program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] { func main(tensor causal_mask, tensor input_ids, state> key_cache, state> value_cache) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1, 1]}, {"input_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 1], [1, 2048], [1, 2048]]}, {"input_ids", [[1, 1], [1, 2048]]}})))] { string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp32")]; tensor model_model_embed_tokens_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155582592))))[name = string("model_model_embed_tokens_weight_quantized")]; tensor model_model_layers_0_input_layernorm_weight = const()[name = string("model_model_layers_0_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194478272)))]; tensor model_model_layers_0_self_attn_q_proj_bias = const()[name = string("model_model_layers_0_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194486528)))]; tensor model_model_layers_0_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194494784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196592000))))[name = string("model_model_layers_0_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_0_self_attn_k_proj_bias = const()[name = string("model_model_layers_0_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197116352)))]; tensor model_model_layers_0_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197117440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197379648))))[name = string("model_model_layers_0_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_0_self_attn_v_proj_bias = const()[name = string("model_model_layers_0_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197445248)))]; tensor model_model_layers_0_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197446336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197708544))))[name = string("model_model_layers_0_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_0_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197774144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199871360))))[name = string("model_model_layers_0_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_0_post_attention_layernorm_weight = const()[name = string("model_model_layers_0_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200395712)))]; tensor model_model_layers_0_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200403968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211676224))))[name = string("model_model_layers_0_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_0_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214494336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225766592))))[name = string("model_model_layers_0_mlp_up_proj_weight_quantized")]; tensor model_model_layers_0_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228584704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239856960))))[name = string("model_model_layers_0_mlp_down_proj_weight_quantized")]; tensor model_model_layers_1_input_layernorm_weight = const()[name = string("model_model_layers_1_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242675072)))]; tensor model_model_layers_1_self_attn_q_proj_bias = const()[name = string("model_model_layers_1_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242683328)))]; tensor model_model_layers_1_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242691584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244788800))))[name = string("model_model_layers_1_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_1_self_attn_k_proj_bias = const()[name = string("model_model_layers_1_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245313152)))]; tensor model_model_layers_1_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245314240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245576448))))[name = string("model_model_layers_1_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_1_self_attn_v_proj_bias = const()[name = string("model_model_layers_1_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245642048)))]; tensor model_model_layers_1_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245643136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245905344))))[name = string("model_model_layers_1_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_1_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245970944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248068160))))[name = string("model_model_layers_1_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_1_post_attention_layernorm_weight = const()[name = string("model_model_layers_1_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248592512)))]; tensor model_model_layers_1_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248600768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259873024))))[name = string("model_model_layers_1_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_1_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262691136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273963392))))[name = string("model_model_layers_1_mlp_up_proj_weight_quantized")]; tensor model_model_layers_1_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276781504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288053760))))[name = string("model_model_layers_1_mlp_down_proj_weight_quantized")]; tensor model_model_layers_2_input_layernorm_weight = const()[name = string("model_model_layers_2_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290871872)))]; tensor model_model_layers_2_self_attn_q_proj_bias = const()[name = string("model_model_layers_2_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290880128)))]; tensor model_model_layers_2_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290888384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292985600))))[name = string("model_model_layers_2_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_2_self_attn_k_proj_bias = const()[name = string("model_model_layers_2_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293509952)))]; tensor model_model_layers_2_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293511040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293773248))))[name = string("model_model_layers_2_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_2_self_attn_v_proj_bias = const()[name = string("model_model_layers_2_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293838848)))]; tensor model_model_layers_2_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293839936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294102144))))[name = string("model_model_layers_2_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_2_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294167744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296264960))))[name = string("model_model_layers_2_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_2_post_attention_layernorm_weight = const()[name = string("model_model_layers_2_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296789312)))]; tensor model_model_layers_2_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296797568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308069824))))[name = string("model_model_layers_2_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_2_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310887936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322160192))))[name = string("model_model_layers_2_mlp_up_proj_weight_quantized")]; tensor model_model_layers_2_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324978304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336250560))))[name = string("model_model_layers_2_mlp_down_proj_weight_quantized")]; tensor model_model_layers_3_input_layernorm_weight = const()[name = string("model_model_layers_3_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339068672)))]; tensor model_model_layers_3_self_attn_q_proj_bias = const()[name = string("model_model_layers_3_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339076928)))]; tensor model_model_layers_3_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339085184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341182400))))[name = string("model_model_layers_3_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_3_self_attn_k_proj_bias = const()[name = string("model_model_layers_3_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341706752)))]; tensor model_model_layers_3_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341707840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341970048))))[name = string("model_model_layers_3_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_3_self_attn_v_proj_bias = const()[name = string("model_model_layers_3_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342035648)))]; tensor model_model_layers_3_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342036736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342298944))))[name = string("model_model_layers_3_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_3_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342364544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344461760))))[name = string("model_model_layers_3_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_3_post_attention_layernorm_weight = const()[name = string("model_model_layers_3_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344986112)))]; tensor model_model_layers_3_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344994368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356266624))))[name = string("model_model_layers_3_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_3_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359084736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370356992))))[name = string("model_model_layers_3_mlp_up_proj_weight_quantized")]; tensor model_model_layers_3_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373175104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384447360))))[name = string("model_model_layers_3_mlp_down_proj_weight_quantized")]; tensor model_model_layers_4_input_layernorm_weight = const()[name = string("model_model_layers_4_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387265472)))]; tensor model_model_layers_4_self_attn_q_proj_bias = const()[name = string("model_model_layers_4_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387273728)))]; tensor model_model_layers_4_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387281984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389379200))))[name = string("model_model_layers_4_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_4_self_attn_k_proj_bias = const()[name = string("model_model_layers_4_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389903552)))]; tensor model_model_layers_4_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389904640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390166848))))[name = string("model_model_layers_4_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_4_self_attn_v_proj_bias = const()[name = string("model_model_layers_4_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390232448)))]; tensor model_model_layers_4_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390233536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390495744))))[name = string("model_model_layers_4_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_4_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390561344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392658560))))[name = string("model_model_layers_4_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_4_post_attention_layernorm_weight = const()[name = string("model_model_layers_4_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393182912)))]; tensor model_model_layers_4_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393191168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404463424))))[name = string("model_model_layers_4_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_4_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407281536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418553792))))[name = string("model_model_layers_4_mlp_up_proj_weight_quantized")]; tensor model_model_layers_4_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421371904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432644160))))[name = string("model_model_layers_4_mlp_down_proj_weight_quantized")]; tensor model_model_layers_5_input_layernorm_weight = const()[name = string("model_model_layers_5_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435462272)))]; tensor model_model_layers_5_self_attn_q_proj_bias = const()[name = string("model_model_layers_5_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435470528)))]; tensor model_model_layers_5_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435478784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437576000))))[name = string("model_model_layers_5_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_5_self_attn_k_proj_bias = const()[name = string("model_model_layers_5_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438100352)))]; tensor model_model_layers_5_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438101440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438363648))))[name = string("model_model_layers_5_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_5_self_attn_v_proj_bias = const()[name = string("model_model_layers_5_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438429248)))]; tensor model_model_layers_5_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438430336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438692544))))[name = string("model_model_layers_5_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_5_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438758144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440855360))))[name = string("model_model_layers_5_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_5_post_attention_layernorm_weight = const()[name = string("model_model_layers_5_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(441379712)))]; tensor model_model_layers_5_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(441387968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452660224))))[name = string("model_model_layers_5_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_5_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455478336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466750592))))[name = string("model_model_layers_5_mlp_up_proj_weight_quantized")]; tensor model_model_layers_5_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469568704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480840960))))[name = string("model_model_layers_5_mlp_down_proj_weight_quantized")]; tensor model_model_layers_6_input_layernorm_weight = const()[name = string("model_model_layers_6_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483659072)))]; tensor model_model_layers_6_self_attn_q_proj_bias = const()[name = string("model_model_layers_6_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483667328)))]; tensor model_model_layers_6_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483675584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485772800))))[name = string("model_model_layers_6_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_6_self_attn_k_proj_bias = const()[name = string("model_model_layers_6_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486297152)))]; tensor model_model_layers_6_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486298240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486560448))))[name = string("model_model_layers_6_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_6_self_attn_v_proj_bias = const()[name = string("model_model_layers_6_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486626048)))]; tensor model_model_layers_6_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486627136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486889344))))[name = string("model_model_layers_6_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_6_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486954944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489052160))))[name = string("model_model_layers_6_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_6_post_attention_layernorm_weight = const()[name = string("model_model_layers_6_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489576512)))]; tensor model_model_layers_6_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489584768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500857024))))[name = string("model_model_layers_6_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_6_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503675136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(514947392))))[name = string("model_model_layers_6_mlp_up_proj_weight_quantized")]; tensor model_model_layers_6_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517765504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529037760))))[name = string("model_model_layers_6_mlp_down_proj_weight_quantized")]; tensor model_model_layers_7_input_layernorm_weight = const()[name = string("model_model_layers_7_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531855872)))]; tensor model_model_layers_7_self_attn_q_proj_bias = const()[name = string("model_model_layers_7_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531864128)))]; tensor model_model_layers_7_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531872384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533969600))))[name = string("model_model_layers_7_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_7_self_attn_k_proj_bias = const()[name = string("model_model_layers_7_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534493952)))]; tensor model_model_layers_7_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534495040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534757248))))[name = string("model_model_layers_7_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_7_self_attn_v_proj_bias = const()[name = string("model_model_layers_7_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534822848)))]; tensor model_model_layers_7_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534823936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535086144))))[name = string("model_model_layers_7_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_7_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535151744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537248960))))[name = string("model_model_layers_7_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_7_post_attention_layernorm_weight = const()[name = string("model_model_layers_7_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537773312)))]; tensor model_model_layers_7_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537781568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549053824))))[name = string("model_model_layers_7_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_7_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551871936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563144192))))[name = string("model_model_layers_7_mlp_up_proj_weight_quantized")]; tensor model_model_layers_7_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565962304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577234560))))[name = string("model_model_layers_7_mlp_down_proj_weight_quantized")]; tensor model_model_layers_8_input_layernorm_weight = const()[name = string("model_model_layers_8_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580052672)))]; tensor model_model_layers_8_self_attn_q_proj_bias = const()[name = string("model_model_layers_8_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580060928)))]; tensor model_model_layers_8_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580069184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582166400))))[name = string("model_model_layers_8_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_8_self_attn_k_proj_bias = const()[name = string("model_model_layers_8_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582690752)))]; tensor model_model_layers_8_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582691840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582954048))))[name = string("model_model_layers_8_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_8_self_attn_v_proj_bias = const()[name = string("model_model_layers_8_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583019648)))]; tensor model_model_layers_8_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583020736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583282944))))[name = string("model_model_layers_8_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_8_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583348544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585445760))))[name = string("model_model_layers_8_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_8_post_attention_layernorm_weight = const()[name = string("model_model_layers_8_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585970112)))]; tensor model_model_layers_8_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585978368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597250624))))[name = string("model_model_layers_8_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_8_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600068736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(611340992))))[name = string("model_model_layers_8_mlp_up_proj_weight_quantized")]; tensor model_model_layers_8_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614159104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625431360))))[name = string("model_model_layers_8_mlp_down_proj_weight_quantized")]; tensor model_model_layers_9_input_layernorm_weight = const()[name = string("model_model_layers_9_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628249472)))]; tensor model_model_layers_9_self_attn_q_proj_bias = const()[name = string("model_model_layers_9_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628257728)))]; tensor model_model_layers_9_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628265984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630363200))))[name = string("model_model_layers_9_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_9_self_attn_k_proj_bias = const()[name = string("model_model_layers_9_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630887552)))]; tensor model_model_layers_9_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630888640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631150848))))[name = string("model_model_layers_9_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_9_self_attn_v_proj_bias = const()[name = string("model_model_layers_9_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631216448)))]; tensor model_model_layers_9_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631217536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631479744))))[name = string("model_model_layers_9_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_9_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631545344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633642560))))[name = string("model_model_layers_9_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_9_post_attention_layernorm_weight = const()[name = string("model_model_layers_9_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634166912)))]; tensor model_model_layers_9_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634175168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(645447424))))[name = string("model_model_layers_9_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_9_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648265536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659537792))))[name = string("model_model_layers_9_mlp_up_proj_weight_quantized")]; tensor model_model_layers_9_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662355904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673628160))))[name = string("model_model_layers_9_mlp_down_proj_weight_quantized")]; tensor model_model_layers_10_input_layernorm_weight = const()[name = string("model_model_layers_10_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676446272)))]; tensor model_model_layers_10_self_attn_q_proj_bias = const()[name = string("model_model_layers_10_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676454528)))]; tensor model_model_layers_10_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676462784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678560000))))[name = string("model_model_layers_10_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_10_self_attn_k_proj_bias = const()[name = string("model_model_layers_10_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679084352)))]; tensor model_model_layers_10_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679085440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679347648))))[name = string("model_model_layers_10_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_10_self_attn_v_proj_bias = const()[name = string("model_model_layers_10_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679413248)))]; tensor model_model_layers_10_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679414336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679676544))))[name = string("model_model_layers_10_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_10_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679742144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681839360))))[name = string("model_model_layers_10_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_10_post_attention_layernorm_weight = const()[name = string("model_model_layers_10_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(682363712)))]; tensor model_model_layers_10_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(682371968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(693644224))))[name = string("model_model_layers_10_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_10_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(696462336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707734592))))[name = string("model_model_layers_10_mlp_up_proj_weight_quantized")]; tensor model_model_layers_10_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(710552704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721824960))))[name = string("model_model_layers_10_mlp_down_proj_weight_quantized")]; tensor model_model_layers_11_input_layernorm_weight = const()[name = string("model_model_layers_11_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724643072)))]; tensor model_model_layers_11_self_attn_q_proj_bias = const()[name = string("model_model_layers_11_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724651328)))]; tensor model_model_layers_11_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724659584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726756800))))[name = string("model_model_layers_11_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_11_self_attn_k_proj_bias = const()[name = string("model_model_layers_11_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727281152)))]; tensor model_model_layers_11_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727282240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727544448))))[name = string("model_model_layers_11_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_11_self_attn_v_proj_bias = const()[name = string("model_model_layers_11_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727610048)))]; tensor model_model_layers_11_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727611136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727873344))))[name = string("model_model_layers_11_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_11_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727938944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730036160))))[name = string("model_model_layers_11_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_11_post_attention_layernorm_weight = const()[name = string("model_model_layers_11_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730560512)))]; tensor model_model_layers_11_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(730568768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(741841024))))[name = string("model_model_layers_11_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_11_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744659136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755931392))))[name = string("model_model_layers_11_mlp_up_proj_weight_quantized")]; tensor model_model_layers_11_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758749504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770021760))))[name = string("model_model_layers_11_mlp_down_proj_weight_quantized")]; tensor model_model_layers_12_input_layernorm_weight = const()[name = string("model_model_layers_12_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772839872)))]; tensor model_model_layers_12_self_attn_q_proj_bias = const()[name = string("model_model_layers_12_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772848128)))]; tensor model_model_layers_12_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772856384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774953600))))[name = string("model_model_layers_12_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_12_self_attn_k_proj_bias = const()[name = string("model_model_layers_12_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775477952)))]; tensor model_model_layers_12_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775479040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775741248))))[name = string("model_model_layers_12_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_12_self_attn_v_proj_bias = const()[name = string("model_model_layers_12_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775806848)))]; tensor model_model_layers_12_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(775807936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776070144))))[name = string("model_model_layers_12_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_12_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776135744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778232960))))[name = string("model_model_layers_12_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_12_post_attention_layernorm_weight = const()[name = string("model_model_layers_12_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778757312)))]; tensor model_model_layers_12_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778765568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790037824))))[name = string("model_model_layers_12_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_12_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792855936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804128192))))[name = string("model_model_layers_12_mlp_up_proj_weight_quantized")]; tensor model_model_layers_12_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(806946304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818218560))))[name = string("model_model_layers_12_mlp_down_proj_weight_quantized")]; tensor model_model_layers_13_input_layernorm_weight = const()[name = string("model_model_layers_13_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821036672)))]; tensor model_model_layers_13_self_attn_q_proj_bias = const()[name = string("model_model_layers_13_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821044928)))]; tensor model_model_layers_13_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821053184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823150400))))[name = string("model_model_layers_13_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_13_self_attn_k_proj_bias = const()[name = string("model_model_layers_13_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823674752)))]; tensor model_model_layers_13_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823675840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823938048))))[name = string("model_model_layers_13_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_13_self_attn_v_proj_bias = const()[name = string("model_model_layers_13_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(824003648)))]; tensor model_model_layers_13_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(824004736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(824266944))))[name = string("model_model_layers_13_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_13_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(824332544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826429760))))[name = string("model_model_layers_13_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_13_post_attention_layernorm_weight = const()[name = string("model_model_layers_13_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826954112)))]; tensor model_model_layers_13_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826962368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(838234624))))[name = string("model_model_layers_13_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_13_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(841052736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852324992))))[name = string("model_model_layers_13_mlp_up_proj_weight_quantized")]; tensor model_model_layers_13_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(855143104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(866415360))))[name = string("model_model_layers_13_mlp_down_proj_weight_quantized")]; tensor model_model_layers_14_input_layernorm_weight = const()[name = string("model_model_layers_14_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(869233472)))]; tensor model_model_layers_14_self_attn_q_proj_bias = const()[name = string("model_model_layers_14_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(869241728)))]; tensor model_model_layers_14_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(869249984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871347200))))[name = string("model_model_layers_14_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_14_self_attn_k_proj_bias = const()[name = string("model_model_layers_14_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871871552)))]; tensor model_model_layers_14_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871872640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872134848))))[name = string("model_model_layers_14_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_14_self_attn_v_proj_bias = const()[name = string("model_model_layers_14_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872200448)))]; tensor model_model_layers_14_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872201536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872463744))))[name = string("model_model_layers_14_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_14_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872529344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(874626560))))[name = string("model_model_layers_14_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_14_post_attention_layernorm_weight = const()[name = string("model_model_layers_14_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875150912)))]; tensor model_model_layers_14_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875159168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886431424))))[name = string("model_model_layers_14_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_14_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889249536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(900521792))))[name = string("model_model_layers_14_mlp_up_proj_weight_quantized")]; tensor model_model_layers_14_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(903339904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914612160))))[name = string("model_model_layers_14_mlp_down_proj_weight_quantized")]; tensor model_model_layers_15_input_layernorm_weight = const()[name = string("model_model_layers_15_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917430272)))]; tensor model_model_layers_15_self_attn_q_proj_bias = const()[name = string("model_model_layers_15_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917438528)))]; tensor model_model_layers_15_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917446784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(919544000))))[name = string("model_model_layers_15_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_15_self_attn_k_proj_bias = const()[name = string("model_model_layers_15_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920068352)))]; tensor model_model_layers_15_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920069440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920331648))))[name = string("model_model_layers_15_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_15_self_attn_v_proj_bias = const()[name = string("model_model_layers_15_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920397248)))]; tensor model_model_layers_15_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920398336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920660544))))[name = string("model_model_layers_15_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_15_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920726144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922823360))))[name = string("model_model_layers_15_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_15_post_attention_layernorm_weight = const()[name = string("model_model_layers_15_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923347712)))]; tensor model_model_layers_15_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923355968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934628224))))[name = string("model_model_layers_15_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_15_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(937446336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948718592))))[name = string("model_model_layers_15_mlp_up_proj_weight_quantized")]; tensor model_model_layers_15_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951536704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(962808960))))[name = string("model_model_layers_15_mlp_down_proj_weight_quantized")]; tensor model_model_layers_16_input_layernorm_weight = const()[name = string("model_model_layers_16_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(965627072)))]; tensor model_model_layers_16_self_attn_q_proj_bias = const()[name = string("model_model_layers_16_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(965635328)))]; tensor model_model_layers_16_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(965643584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967740800))))[name = string("model_model_layers_16_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_16_self_attn_k_proj_bias = const()[name = string("model_model_layers_16_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(968265152)))]; tensor model_model_layers_16_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(968266240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(968528448))))[name = string("model_model_layers_16_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_16_self_attn_v_proj_bias = const()[name = string("model_model_layers_16_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(968594048)))]; tensor model_model_layers_16_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(968595136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(968857344))))[name = string("model_model_layers_16_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_16_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(968922944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971020160))))[name = string("model_model_layers_16_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_16_post_attention_layernorm_weight = const()[name = string("model_model_layers_16_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971544512)))]; tensor model_model_layers_16_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971552768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(982825024))))[name = string("model_model_layers_16_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_16_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(985643136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(996915392))))[name = string("model_model_layers_16_mlp_up_proj_weight_quantized")]; tensor model_model_layers_16_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(999733504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1011005760))))[name = string("model_model_layers_16_mlp_down_proj_weight_quantized")]; tensor model_model_layers_17_input_layernorm_weight = const()[name = string("model_model_layers_17_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013823872)))]; tensor model_model_layers_17_self_attn_q_proj_bias = const()[name = string("model_model_layers_17_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013832128)))]; tensor model_model_layers_17_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013840384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1015937600))))[name = string("model_model_layers_17_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_17_self_attn_k_proj_bias = const()[name = string("model_model_layers_17_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1016461952)))]; tensor model_model_layers_17_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1016463040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1016725248))))[name = string("model_model_layers_17_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_17_self_attn_v_proj_bias = const()[name = string("model_model_layers_17_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1016790848)))]; tensor model_model_layers_17_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1016791936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017054144))))[name = string("model_model_layers_17_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_17_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017119744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1019216960))))[name = string("model_model_layers_17_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_17_post_attention_layernorm_weight = const()[name = string("model_model_layers_17_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1019741312)))]; tensor model_model_layers_17_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1019749568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1031021824))))[name = string("model_model_layers_17_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_17_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033839936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045112192))))[name = string("model_model_layers_17_mlp_up_proj_weight_quantized")]; tensor model_model_layers_17_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1047930304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059202560))))[name = string("model_model_layers_17_mlp_down_proj_weight_quantized")]; tensor model_model_layers_18_input_layernorm_weight = const()[name = string("model_model_layers_18_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1062020672)))]; tensor model_model_layers_18_self_attn_q_proj_bias = const()[name = string("model_model_layers_18_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1062028928)))]; tensor model_model_layers_18_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1062037184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064134400))))[name = string("model_model_layers_18_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_18_self_attn_k_proj_bias = const()[name = string("model_model_layers_18_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064658752)))]; tensor model_model_layers_18_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064659840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064922048))))[name = string("model_model_layers_18_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_18_self_attn_v_proj_bias = const()[name = string("model_model_layers_18_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064987648)))]; tensor model_model_layers_18_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064988736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1065250944))))[name = string("model_model_layers_18_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_18_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1065316544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1067413760))))[name = string("model_model_layers_18_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_18_post_attention_layernorm_weight = const()[name = string("model_model_layers_18_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1067938112)))]; tensor model_model_layers_18_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1067946368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1079218624))))[name = string("model_model_layers_18_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_18_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1082036736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093308992))))[name = string("model_model_layers_18_mlp_up_proj_weight_quantized")]; tensor model_model_layers_18_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096127104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1107399360))))[name = string("model_model_layers_18_mlp_down_proj_weight_quantized")]; tensor model_model_layers_19_input_layernorm_weight = const()[name = string("model_model_layers_19_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110217472)))]; tensor model_model_layers_19_self_attn_q_proj_bias = const()[name = string("model_model_layers_19_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110225728)))]; tensor model_model_layers_19_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110233984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112331200))))[name = string("model_model_layers_19_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_19_self_attn_k_proj_bias = const()[name = string("model_model_layers_19_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112855552)))]; tensor model_model_layers_19_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112856640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1113118848))))[name = string("model_model_layers_19_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_19_self_attn_v_proj_bias = const()[name = string("model_model_layers_19_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1113184448)))]; tensor model_model_layers_19_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1113185536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1113447744))))[name = string("model_model_layers_19_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_19_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1113513344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115610560))))[name = string("model_model_layers_19_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_19_post_attention_layernorm_weight = const()[name = string("model_model_layers_19_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1116134912)))]; tensor model_model_layers_19_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1116143168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1127415424))))[name = string("model_model_layers_19_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_19_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1130233536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1141505792))))[name = string("model_model_layers_19_mlp_up_proj_weight_quantized")]; tensor model_model_layers_19_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1144323904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1155596160))))[name = string("model_model_layers_19_mlp_down_proj_weight_quantized")]; tensor model_model_layers_20_input_layernorm_weight = const()[name = string("model_model_layers_20_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158414272)))]; tensor model_model_layers_20_self_attn_q_proj_bias = const()[name = string("model_model_layers_20_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158422528)))]; tensor model_model_layers_20_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158430784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1160528000))))[name = string("model_model_layers_20_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_20_self_attn_k_proj_bias = const()[name = string("model_model_layers_20_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161052352)))]; tensor model_model_layers_20_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161053440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161315648))))[name = string("model_model_layers_20_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_20_self_attn_v_proj_bias = const()[name = string("model_model_layers_20_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161381248)))]; tensor model_model_layers_20_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161382336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161644544))))[name = string("model_model_layers_20_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_20_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161710144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1163807360))))[name = string("model_model_layers_20_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_20_post_attention_layernorm_weight = const()[name = string("model_model_layers_20_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164331712)))]; tensor model_model_layers_20_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164339968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1175612224))))[name = string("model_model_layers_20_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_20_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1178430336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1189702592))))[name = string("model_model_layers_20_mlp_up_proj_weight_quantized")]; tensor model_model_layers_20_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1192520704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1203792960))))[name = string("model_model_layers_20_mlp_down_proj_weight_quantized")]; tensor model_model_layers_21_input_layernorm_weight = const()[name = string("model_model_layers_21_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1206611072)))]; tensor model_model_layers_21_self_attn_q_proj_bias = const()[name = string("model_model_layers_21_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1206619328)))]; tensor model_model_layers_21_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1206627584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1208724800))))[name = string("model_model_layers_21_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_21_self_attn_k_proj_bias = const()[name = string("model_model_layers_21_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209249152)))]; tensor model_model_layers_21_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209250240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209512448))))[name = string("model_model_layers_21_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_21_self_attn_v_proj_bias = const()[name = string("model_model_layers_21_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209578048)))]; tensor model_model_layers_21_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209579136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209841344))))[name = string("model_model_layers_21_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_21_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209906944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1212004160))))[name = string("model_model_layers_21_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_21_post_attention_layernorm_weight = const()[name = string("model_model_layers_21_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1212528512)))]; tensor model_model_layers_21_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1212536768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1223809024))))[name = string("model_model_layers_21_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_21_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1226627136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1237899392))))[name = string("model_model_layers_21_mlp_up_proj_weight_quantized")]; tensor model_model_layers_21_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240717504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1251989760))))[name = string("model_model_layers_21_mlp_down_proj_weight_quantized")]; tensor model_model_layers_22_input_layernorm_weight = const()[name = string("model_model_layers_22_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1254807872)))]; tensor model_model_layers_22_self_attn_q_proj_bias = const()[name = string("model_model_layers_22_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1254816128)))]; tensor model_model_layers_22_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1254824384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256921600))))[name = string("model_model_layers_22_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_22_self_attn_k_proj_bias = const()[name = string("model_model_layers_22_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1257445952)))]; tensor model_model_layers_22_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1257447040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1257709248))))[name = string("model_model_layers_22_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_22_self_attn_v_proj_bias = const()[name = string("model_model_layers_22_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1257774848)))]; tensor model_model_layers_22_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1257775936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1258038144))))[name = string("model_model_layers_22_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_22_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1258103744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1260200960))))[name = string("model_model_layers_22_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_22_post_attention_layernorm_weight = const()[name = string("model_model_layers_22_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1260725312)))]; tensor model_model_layers_22_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1260733568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272005824))))[name = string("model_model_layers_22_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_22_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1274823936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286096192))))[name = string("model_model_layers_22_mlp_up_proj_weight_quantized")]; tensor model_model_layers_22_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1288914304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1300186560))))[name = string("model_model_layers_22_mlp_down_proj_weight_quantized")]; tensor model_model_layers_23_input_layernorm_weight = const()[name = string("model_model_layers_23_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1303004672)))]; tensor model_model_layers_23_self_attn_q_proj_bias = const()[name = string("model_model_layers_23_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1303012928)))]; tensor model_model_layers_23_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1303021184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305118400))))[name = string("model_model_layers_23_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_23_self_attn_k_proj_bias = const()[name = string("model_model_layers_23_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305642752)))]; tensor model_model_layers_23_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305643840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305906048))))[name = string("model_model_layers_23_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_23_self_attn_v_proj_bias = const()[name = string("model_model_layers_23_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305971648)))]; tensor model_model_layers_23_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305972736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1306234944))))[name = string("model_model_layers_23_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_23_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1306300544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1308397760))))[name = string("model_model_layers_23_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_23_post_attention_layernorm_weight = const()[name = string("model_model_layers_23_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1308922112)))]; tensor model_model_layers_23_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1308930368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1320202624))))[name = string("model_model_layers_23_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_23_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1323020736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1334292992))))[name = string("model_model_layers_23_mlp_up_proj_weight_quantized")]; tensor model_model_layers_23_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1337111104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348383360))))[name = string("model_model_layers_23_mlp_down_proj_weight_quantized")]; tensor model_model_layers_24_input_layernorm_weight = const()[name = string("model_model_layers_24_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351201472)))]; tensor model_model_layers_24_self_attn_q_proj_bias = const()[name = string("model_model_layers_24_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351209728)))]; tensor model_model_layers_24_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351217984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1353315200))))[name = string("model_model_layers_24_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_24_self_attn_k_proj_bias = const()[name = string("model_model_layers_24_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1353839552)))]; tensor model_model_layers_24_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1353840640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1354102848))))[name = string("model_model_layers_24_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_24_self_attn_v_proj_bias = const()[name = string("model_model_layers_24_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1354168448)))]; tensor model_model_layers_24_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1354169536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1354431744))))[name = string("model_model_layers_24_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_24_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1354497344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1356594560))))[name = string("model_model_layers_24_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_24_post_attention_layernorm_weight = const()[name = string("model_model_layers_24_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1357118912)))]; tensor model_model_layers_24_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1357127168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1368399424))))[name = string("model_model_layers_24_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_24_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1371217536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1382489792))))[name = string("model_model_layers_24_mlp_up_proj_weight_quantized")]; tensor model_model_layers_24_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1385307904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1396580160))))[name = string("model_model_layers_24_mlp_down_proj_weight_quantized")]; tensor model_model_layers_25_input_layernorm_weight = const()[name = string("model_model_layers_25_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1399398272)))]; tensor model_model_layers_25_self_attn_q_proj_bias = const()[name = string("model_model_layers_25_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1399406528)))]; tensor model_model_layers_25_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1399414784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1401512000))))[name = string("model_model_layers_25_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_25_self_attn_k_proj_bias = const()[name = string("model_model_layers_25_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1402036352)))]; tensor model_model_layers_25_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1402037440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1402299648))))[name = string("model_model_layers_25_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_25_self_attn_v_proj_bias = const()[name = string("model_model_layers_25_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1402365248)))]; tensor model_model_layers_25_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1402366336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1402628544))))[name = string("model_model_layers_25_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_25_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1402694144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1404791360))))[name = string("model_model_layers_25_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_25_post_attention_layernorm_weight = const()[name = string("model_model_layers_25_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1405315712)))]; tensor model_model_layers_25_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1405323968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1416596224))))[name = string("model_model_layers_25_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_25_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1419414336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1430686592))))[name = string("model_model_layers_25_mlp_up_proj_weight_quantized")]; tensor model_model_layers_25_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433504704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1444776960))))[name = string("model_model_layers_25_mlp_down_proj_weight_quantized")]; tensor model_model_layers_26_input_layernorm_weight = const()[name = string("model_model_layers_26_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1447595072)))]; tensor model_model_layers_26_self_attn_q_proj_bias = const()[name = string("model_model_layers_26_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1447603328)))]; tensor model_model_layers_26_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1447611584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1449708800))))[name = string("model_model_layers_26_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_26_self_attn_k_proj_bias = const()[name = string("model_model_layers_26_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1450233152)))]; tensor model_model_layers_26_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1450234240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1450496448))))[name = string("model_model_layers_26_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_26_self_attn_v_proj_bias = const()[name = string("model_model_layers_26_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1450562048)))]; tensor model_model_layers_26_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1450563136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1450825344))))[name = string("model_model_layers_26_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_26_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1450890944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1452988160))))[name = string("model_model_layers_26_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_26_post_attention_layernorm_weight = const()[name = string("model_model_layers_26_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1453512512)))]; tensor model_model_layers_26_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1453520768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1464793024))))[name = string("model_model_layers_26_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_26_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1467611136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1478883392))))[name = string("model_model_layers_26_mlp_up_proj_weight_quantized")]; tensor model_model_layers_26_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1481701504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1492973760))))[name = string("model_model_layers_26_mlp_down_proj_weight_quantized")]; tensor model_model_layers_27_input_layernorm_weight = const()[name = string("model_model_layers_27_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1495791872)))]; tensor model_model_layers_27_self_attn_q_proj_bias = const()[name = string("model_model_layers_27_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1495800128)))]; tensor model_model_layers_27_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1495808384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1497905600))))[name = string("model_model_layers_27_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_27_self_attn_k_proj_bias = const()[name = string("model_model_layers_27_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1498429952)))]; tensor model_model_layers_27_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1498431040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1498693248))))[name = string("model_model_layers_27_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_27_self_attn_v_proj_bias = const()[name = string("model_model_layers_27_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1498758848)))]; tensor model_model_layers_27_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1498759936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1499022144))))[name = string("model_model_layers_27_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_27_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1499087744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1501184960))))[name = string("model_model_layers_27_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_27_post_attention_layernorm_weight = const()[name = string("model_model_layers_27_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1501709312)))]; tensor model_model_layers_27_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1501717568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1512989824))))[name = string("model_model_layers_27_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_27_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515807936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1527080192))))[name = string("model_model_layers_27_mlp_up_proj_weight_quantized")]; tensor model_model_layers_27_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1529898304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1541170560))))[name = string("model_model_layers_27_mlp_down_proj_weight_quantized")]; tensor model_model_layers_28_input_layernorm_weight = const()[name = string("model_model_layers_28_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1543988672)))]; tensor model_model_layers_28_self_attn_q_proj_bias = const()[name = string("model_model_layers_28_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1543996928)))]; tensor model_model_layers_28_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1544005184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1546102400))))[name = string("model_model_layers_28_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_28_self_attn_k_proj_bias = const()[name = string("model_model_layers_28_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1546626752)))]; tensor model_model_layers_28_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1546627840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1546890048))))[name = string("model_model_layers_28_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_28_self_attn_v_proj_bias = const()[name = string("model_model_layers_28_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1546955648)))]; tensor model_model_layers_28_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1546956736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1547218944))))[name = string("model_model_layers_28_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_28_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1547284544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1549381760))))[name = string("model_model_layers_28_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_28_post_attention_layernorm_weight = const()[name = string("model_model_layers_28_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1549906112)))]; tensor model_model_layers_28_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1549914368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561186624))))[name = string("model_model_layers_28_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_28_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564004736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575276992))))[name = string("model_model_layers_28_mlp_up_proj_weight_quantized")]; tensor model_model_layers_28_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578095104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1589367360))))[name = string("model_model_layers_28_mlp_down_proj_weight_quantized")]; tensor model_model_layers_29_input_layernorm_weight = const()[name = string("model_model_layers_29_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1592185472)))]; tensor model_model_layers_29_self_attn_q_proj_bias = const()[name = string("model_model_layers_29_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1592193728)))]; tensor model_model_layers_29_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1592201984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594299200))))[name = string("model_model_layers_29_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_29_self_attn_k_proj_bias = const()[name = string("model_model_layers_29_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594823552)))]; tensor model_model_layers_29_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594824640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1595086848))))[name = string("model_model_layers_29_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_29_self_attn_v_proj_bias = const()[name = string("model_model_layers_29_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1595152448)))]; tensor model_model_layers_29_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1595153536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1595415744))))[name = string("model_model_layers_29_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_29_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1595481344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1597578560))))[name = string("model_model_layers_29_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_29_post_attention_layernorm_weight = const()[name = string("model_model_layers_29_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1598102912)))]; tensor model_model_layers_29_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1598111168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1609383424))))[name = string("model_model_layers_29_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_29_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1612201536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1623473792))))[name = string("model_model_layers_29_mlp_up_proj_weight_quantized")]; tensor model_model_layers_29_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1626291904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1637564160))))[name = string("model_model_layers_29_mlp_down_proj_weight_quantized")]; tensor model_model_layers_30_input_layernorm_weight = const()[name = string("model_model_layers_30_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1640382272)))]; tensor model_model_layers_30_self_attn_q_proj_bias = const()[name = string("model_model_layers_30_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1640390528)))]; tensor model_model_layers_30_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1640398784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1642496000))))[name = string("model_model_layers_30_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_30_self_attn_k_proj_bias = const()[name = string("model_model_layers_30_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1643020352)))]; tensor model_model_layers_30_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1643021440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1643283648))))[name = string("model_model_layers_30_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_30_self_attn_v_proj_bias = const()[name = string("model_model_layers_30_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1643349248)))]; tensor model_model_layers_30_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1643350336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1643612544))))[name = string("model_model_layers_30_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_30_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1643678144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1645775360))))[name = string("model_model_layers_30_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_30_post_attention_layernorm_weight = const()[name = string("model_model_layers_30_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1646299712)))]; tensor model_model_layers_30_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1646307968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1657580224))))[name = string("model_model_layers_30_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_30_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1660398336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1671670592))))[name = string("model_model_layers_30_mlp_up_proj_weight_quantized")]; tensor model_model_layers_30_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1674488704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1685760960))))[name = string("model_model_layers_30_mlp_down_proj_weight_quantized")]; tensor model_model_layers_31_input_layernorm_weight = const()[name = string("model_model_layers_31_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1688579072)))]; tensor model_model_layers_31_self_attn_q_proj_bias = const()[name = string("model_model_layers_31_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1688587328)))]; tensor model_model_layers_31_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1688595584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1690692800))))[name = string("model_model_layers_31_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_31_self_attn_k_proj_bias = const()[name = string("model_model_layers_31_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1691217152)))]; tensor model_model_layers_31_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1691218240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1691480448))))[name = string("model_model_layers_31_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_31_self_attn_v_proj_bias = const()[name = string("model_model_layers_31_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1691546048)))]; tensor model_model_layers_31_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1691547136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1691809344))))[name = string("model_model_layers_31_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_31_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1691874944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1693972160))))[name = string("model_model_layers_31_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_31_post_attention_layernorm_weight = const()[name = string("model_model_layers_31_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1694496512)))]; tensor model_model_layers_31_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1694504768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1705777024))))[name = string("model_model_layers_31_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_31_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1708595136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1719867392))))[name = string("model_model_layers_31_mlp_up_proj_weight_quantized")]; tensor model_model_layers_31_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1722685504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1733957760))))[name = string("model_model_layers_31_mlp_down_proj_weight_quantized")]; tensor model_model_layers_32_input_layernorm_weight = const()[name = string("model_model_layers_32_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1736775872)))]; tensor model_model_layers_32_self_attn_q_proj_bias = const()[name = string("model_model_layers_32_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1736784128)))]; tensor model_model_layers_32_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1736792384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1738889600))))[name = string("model_model_layers_32_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_32_self_attn_k_proj_bias = const()[name = string("model_model_layers_32_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1739413952)))]; tensor model_model_layers_32_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1739415040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1739677248))))[name = string("model_model_layers_32_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_32_self_attn_v_proj_bias = const()[name = string("model_model_layers_32_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1739742848)))]; tensor model_model_layers_32_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1739743936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1740006144))))[name = string("model_model_layers_32_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_32_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1740071744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1742168960))))[name = string("model_model_layers_32_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_32_post_attention_layernorm_weight = const()[name = string("model_model_layers_32_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1742693312)))]; tensor model_model_layers_32_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1742701568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1753973824))))[name = string("model_model_layers_32_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_32_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1756791936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1768064192))))[name = string("model_model_layers_32_mlp_up_proj_weight_quantized")]; tensor model_model_layers_32_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1770882304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1782154560))))[name = string("model_model_layers_32_mlp_down_proj_weight_quantized")]; tensor model_model_layers_33_input_layernorm_weight = const()[name = string("model_model_layers_33_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1784972672)))]; tensor model_model_layers_33_self_attn_q_proj_bias = const()[name = string("model_model_layers_33_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1784980928)))]; tensor model_model_layers_33_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1784989184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1787086400))))[name = string("model_model_layers_33_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_33_self_attn_k_proj_bias = const()[name = string("model_model_layers_33_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1787610752)))]; tensor model_model_layers_33_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1787611840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1787874048))))[name = string("model_model_layers_33_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_33_self_attn_v_proj_bias = const()[name = string("model_model_layers_33_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1787939648)))]; tensor model_model_layers_33_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1787940736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1788202944))))[name = string("model_model_layers_33_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_33_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1788268544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1790365760))))[name = string("model_model_layers_33_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_33_post_attention_layernorm_weight = const()[name = string("model_model_layers_33_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1790890112)))]; tensor model_model_layers_33_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1790898368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1802170624))))[name = string("model_model_layers_33_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_33_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1804988736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1816260992))))[name = string("model_model_layers_33_mlp_up_proj_weight_quantized")]; tensor model_model_layers_33_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1819079104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1830351360))))[name = string("model_model_layers_33_mlp_down_proj_weight_quantized")]; tensor model_model_layers_34_input_layernorm_weight = const()[name = string("model_model_layers_34_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1833169472)))]; tensor model_model_layers_34_self_attn_q_proj_bias = const()[name = string("model_model_layers_34_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1833177728)))]; tensor model_model_layers_34_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1833185984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1835283200))))[name = string("model_model_layers_34_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_34_self_attn_k_proj_bias = const()[name = string("model_model_layers_34_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1835807552)))]; tensor model_model_layers_34_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1835808640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1836070848))))[name = string("model_model_layers_34_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_34_self_attn_v_proj_bias = const()[name = string("model_model_layers_34_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1836136448)))]; tensor model_model_layers_34_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1836137536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1836399744))))[name = string("model_model_layers_34_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_34_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1836465344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1838562560))))[name = string("model_model_layers_34_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_34_post_attention_layernorm_weight = const()[name = string("model_model_layers_34_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1839086912)))]; tensor model_model_layers_34_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1839095168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1850367424))))[name = string("model_model_layers_34_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_34_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1853185536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1864457792))))[name = string("model_model_layers_34_mlp_up_proj_weight_quantized")]; tensor model_model_layers_34_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1867275904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1878548160))))[name = string("model_model_layers_34_mlp_down_proj_weight_quantized")]; tensor model_model_layers_35_input_layernorm_weight = const()[name = string("model_model_layers_35_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1881366272)))]; tensor model_model_layers_35_self_attn_q_proj_bias = const()[name = string("model_model_layers_35_self_attn_q_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1881374528)))]; tensor model_model_layers_35_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1881382784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1883480000))))[name = string("model_model_layers_35_self_attn_q_proj_weight_quantized")]; tensor model_model_layers_35_self_attn_k_proj_bias = const()[name = string("model_model_layers_35_self_attn_k_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1884004352)))]; tensor model_model_layers_35_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1884005440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1884267648))))[name = string("model_model_layers_35_self_attn_k_proj_weight_quantized")]; tensor model_model_layers_35_self_attn_v_proj_bias = const()[name = string("model_model_layers_35_self_attn_v_proj_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1884333248)))]; tensor model_model_layers_35_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1884334336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1884596544))))[name = string("model_model_layers_35_self_attn_v_proj_weight_quantized")]; tensor model_model_layers_35_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1884662144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1886759360))))[name = string("model_model_layers_35_self_attn_o_proj_weight_quantized")]; tensor model_model_layers_35_post_attention_layernorm_weight = const()[name = string("model_model_layers_35_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1887283712)))]; tensor model_model_layers_35_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1887291968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1898564224))))[name = string("model_model_layers_35_mlp_gate_proj_weight_quantized")]; tensor model_model_layers_35_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1901382336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1912654592))))[name = string("model_model_layers_35_mlp_up_proj_weight_quantized")]; tensor model_model_layers_35_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1915472704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1926744960))))[name = string("model_model_layers_35_mlp_down_proj_weight_quantized")]; tensor model_model_norm_weight = const()[name = string("model_model_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1929563072)))]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = causal_mask)[name = string("cast_148")]; tensor var_7_shape = shape(x = cast_0)[name = string("op_7_shape")]; int32 select_0 = const()[name = string("select_0"), val = int32(3)]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; int32 gather_0 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0, validate_indices = gather_0_validate_indices_0, x = var_7_shape)[name = string("gather_0")]; tensor var_10_shape = shape(x = input_ids)[name = string("op_10_shape")]; int32 select_1 = const()[name = string("select_1"), val = int32(1)]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; int32 gather_1 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1, validate_indices = gather_1_validate_indices_0, x = var_10_shape)[name = string("gather_1")]; int32 past_seen_tokens = sub(x = gather_0, y = gather_1)[name = string("past_seen_tokens")]; int32 var_88 = const()[name = string("op_88"), val = int32(-1)]; int32 var_100 = const()[name = string("op_100"), val = int32(8)]; int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)]; int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)]; bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)]; tensor inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_quantized)[name = string("inputs_embeds")]; tensor var_214_shape = shape(x = inputs_embeds)[name = string("op_214_shape")]; int32 select_2 = const()[name = string("select_2"), val = int32(1)]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; int32 gather_2 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2, validate_indices = gather_2_validate_indices_0, x = var_214_shape)[name = string("gather_2")]; int32 var_216 = add(x = past_seen_tokens, y = gather_2)[name = string("op_216")]; int32 const_0 = const()[name = string("const_0"), val = int32(1)]; tensor cache_position = range_1d(end = var_216, start = past_seen_tokens, step = const_0)[name = string("cache_position")]; tensor position_ids_axes_0 = const()[name = string("position_ids_axes_0"), val = tensor([0])]; tensor position_ids = expand_dims(axes = position_ids_axes_0, x = cache_position)[name = string("position_ids")]; tensor var_229_axes_0 = const()[name = string("op_229_axes_0"), val = tensor([1])]; tensor var_229 = expand_dims(axes = var_229_axes_0, x = position_ids)[name = string("op_229")]; string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")]; tensor const_2 = const()[name = string("const_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1929571328)))]; bool var_234_transpose_x_0 = const()[name = string("op_234_transpose_x_0"), val = bool(false)]; bool var_234_transpose_y_0 = const()[name = string("op_234_transpose_y_0"), val = bool(false)]; tensor cast_2 = cast(dtype = cast_2_dtype_0, x = var_229)[name = string("cast_147")]; tensor var_234 = matmul(transpose_x = var_234_transpose_x_0, transpose_y = var_234_transpose_y_0, x = const_2, y = cast_2)[name = string("op_234")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs = transpose(perm = freqs_perm_0, x = var_234)[name = string("transpose_144")]; tensor emb = concat(axis = var_88, interleave = emb_interleave_0, values = (freqs, freqs))[name = string("emb")]; tensor cos_1 = cos(x = emb)[name = string("cos_1")]; tensor sin_1 = sin(x = emb)[name = string("sin_1")]; fp32 var_94_promoted = const()[name = string("op_94_promoted"), val = fp32(0x1p+1)]; tensor var_255 = pow(x = inputs_embeds, y = var_94_promoted)[name = string("op_255")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_255)[name = string("variance_1")]; fp32 var_258 = const()[name = string("op_258"), val = fp32(0x1.0c6f7ap-20)]; tensor var_259 = add(x = variance_1, y = var_258)[name = string("op_259")]; fp32 var_260_epsilon_0 = const()[name = string("op_260_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_260 = rsqrt(epsilon = var_260_epsilon_0, x = var_259)[name = string("op_260")]; tensor hidden_states_3 = mul(x = inputs_embeds, y = var_260)[name = string("hidden_states_3")]; tensor hidden_states_7 = mul(x = model_model_layers_0_input_layernorm_weight, y = hidden_states_3)[name = string("hidden_states_7")]; tensor linear_0 = linear(bias = model_model_layers_0_self_attn_q_proj_bias, weight = model_model_layers_0_self_attn_q_proj_weight_quantized, x = hidden_states_7)[name = string("linear_0")]; tensor linear_1 = linear(bias = model_model_layers_0_self_attn_k_proj_bias, weight = model_model_layers_0_self_attn_k_proj_weight_quantized, x = hidden_states_7)[name = string("linear_1")]; tensor linear_2 = linear(bias = model_model_layers_0_self_attn_v_proj_bias, weight = model_model_layers_0_self_attn_v_proj_weight_quantized, x = hidden_states_7)[name = string("linear_2")]; tensor concat_0x = const()[name = string("concat_0x"), val = tensor([1, -1, 16, 128])]; tensor var_283 = reshape(shape = concat_0x, x = linear_0)[name = string("op_283")]; tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_1x = const()[name = string("concat_1x"), val = tensor([1, -1, 2, 128])]; tensor var_286 = reshape(shape = concat_1x, x = linear_1)[name = string("op_286")]; tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 2, 128])]; tensor var_289 = reshape(shape = concat_2x, x = linear_2)[name = string("op_289")]; tensor v_state_1_perm_0 = const()[name = string("v_state_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_7_axes_0 = const()[name = string("cos_7_axes_0"), val = tensor([1])]; tensor cos_7 = expand_dims(axes = cos_7_axes_0, x = cos_1)[name = string("cos_7")]; tensor sin_7_axes_0 = const()[name = string("sin_7_axes_0"), val = tensor([1])]; tensor sin_7 = expand_dims(axes = sin_7_axes_0, x = sin_1)[name = string("sin_7")]; tensor q_1 = transpose(perm = q_1_perm_0, x = var_283)[name = string("transpose_143")]; tensor var_293 = mul(x = q_1, y = cos_7)[name = string("op_293")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1)[name = string("x2_1")]; fp32 const_3_promoted = const()[name = string("const_3_promoted"), val = fp32(-0x1p+0)]; tensor var_304 = mul(x = x2_1, y = const_3_promoted)[name = string("op_304")]; bool var_306_interleave_0 = const()[name = string("op_306_interleave_0"), val = bool(false)]; tensor var_306 = concat(axis = var_88, interleave = var_306_interleave_0, values = (var_304, x1_1))[name = string("op_306")]; tensor var_307 = mul(x = var_306, y = sin_7)[name = string("op_307")]; tensor query_states_3 = add(x = var_293, y = var_307)[name = string("query_states_3")]; tensor k_1 = transpose(perm = k_1_perm_0, x = var_286)[name = string("transpose_142")]; tensor var_309 = mul(x = k_1, y = cos_7)[name = string("op_309")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1)[name = string("x2_3")]; fp32 const_4_promoted = const()[name = string("const_4_promoted"), val = fp32(-0x1p+0)]; tensor var_320 = mul(x = x2_3, y = const_4_promoted)[name = string("op_320")]; bool var_322_interleave_0 = const()[name = string("op_322_interleave_0"), val = bool(false)]; tensor var_322 = concat(axis = var_88, interleave = var_322_interleave_0, values = (var_320, x1_3))[name = string("op_322")]; tensor var_323 = mul(x = var_322, y = sin_7)[name = string("op_323")]; tensor k_state_1 = add(x = var_309, y = var_323)[name = string("k_state_1")]; tensor var_325_shape = shape(x = cache_position)[name = string("op_325_shape")]; int32 select_10 = const()[name = string("select_10"), val = int32(0)]; int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; int32 gather_10 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10, validate_indices = gather_10_validate_indices_0, x = var_325_shape)[name = string("gather_10")]; int32 end_1 = add(x = past_seen_tokens, y = gather_10)[name = string("end_1")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; string cast_11_dtype_0 = const()[name = string("cast_11_dtype_0"), val = string("fp32")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2_axes_0 = const()[name = string("expand_dims_2_axes_0"), val = tensor([0])]; tensor expand_dims_2 = expand_dims(axes = expand_dims_2_axes_0, x = past_seen_tokens)[name = string("expand_dims_2")]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([2])]; tensor expand_dims_5_axes_0 = const()[name = string("expand_dims_5_axes_0"), val = tensor([0])]; tensor expand_dims_5 = expand_dims(axes = expand_dims_5_axes_0, x = end_1)[name = string("expand_dims_5")]; tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, expand_dims_0, expand_dims_1, expand_dims_2, expand_dims_3))[name = string("concat_5")]; tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; tensor concat_6_values4_0 = const()[name = string("concat_6_values4_0"), val = tensor([0])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_4, expand_dims_5, concat_6_values4_0))[name = string("concat_6")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor cast_11 = cast(dtype = cast_11_dtype_0, x = read_state_0)[name = string("cast_146")]; tensor key_cache_internal_tensor_assign_1 = slice_update(begin = concat_5, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1, x = cast_11)[name = string("key_cache_internal_tensor_assign_1")]; string cast_12_dtype_0 = const()[name = string("cast_12_dtype_0"), val = string("fp16")]; tensor cast_12 = cast(dtype = cast_12_dtype_0, x = key_cache_internal_tensor_assign_1)[name = string("cast_145")]; write_state(data = cast_12, input = key_cache)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_72")]; string key_cache_internal_tensor_assign_1_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_1_dtype_0"), val = string("fp32")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; string cast_13_dtype_0 = const()[name = string("cast_13_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_1 = transpose(perm = v_state_1_perm_0, x = var_289)[name = string("transpose_141")]; tensor cast_13 = cast(dtype = cast_13_dtype_0, x = read_state_1)[name = string("cast_144")]; tensor value_cache_internal_tensor_assign_1 = slice_update(begin = concat_5, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1, x = cast_13)[name = string("value_cache_internal_tensor_assign_1")]; string cast_14_dtype_0 = const()[name = string("cast_14_dtype_0"), val = string("fp16")]; tensor cast_14 = cast(dtype = cast_14_dtype_0, x = value_cache_internal_tensor_assign_1)[name = string("cast_143")]; write_state(data = cast_14, input = value_cache)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_73")]; string value_cache_internal_tensor_assign_1_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_1_dtype_0"), val = string("fp32")]; tensor var_346_begin_0 = const()[name = string("op_346_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_346_end_0 = const()[name = string("op_346_end_0"), val = tensor([1, 1, 2, 2048, 128])]; tensor var_346_end_mask_0 = const()[name = string("op_346_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_346_squeeze_mask_0 = const()[name = string("op_346_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_1 = cast(dtype = key_cache_internal_tensor_assign_1_dtype_0, x = coreml_update_state_72)[name = string("cast_142")]; tensor var_346 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, squeeze_mask = var_346_squeeze_mask_0, x = key_cache_internal_tensor_assign_1_1)[name = string("op_346")]; int32 concat_11_values0_0 = const()[name = string("concat_11_values0_0"), val = int32(1)]; int32 concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = int32(2)]; int32 concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = int32(128)]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (concat_11_values0_0, concat_11_values1_0, end_1, concat_11_values3_0))[name = string("concat_11")]; tensor var_349_begin_0 = const()[name = string("op_349_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_349_end_mask_0 = const()[name = string("op_349_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_349 = slice_by_index(begin = var_349_begin_0, end = concat_11, end_mask = var_349_end_mask_0, x = var_346)[name = string("op_349")]; tensor var_351_begin_0 = const()[name = string("op_351_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_351_end_0 = const()[name = string("op_351_end_0"), val = tensor([1, 1, 2, 2048, 128])]; tensor var_351_end_mask_0 = const()[name = string("op_351_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_351_squeeze_mask_0 = const()[name = string("op_351_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_1_1 = cast(dtype = value_cache_internal_tensor_assign_1_dtype_0, x = coreml_update_state_73)[name = string("cast_141")]; tensor var_351 = slice_by_index(begin = var_351_begin_0, end = var_351_end_0, end_mask = var_351_end_mask_0, squeeze_mask = var_351_squeeze_mask_0, x = value_cache_internal_tensor_assign_1_1)[name = string("op_351")]; tensor var_354_begin_0 = const()[name = string("op_354_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_354_end_mask_0 = const()[name = string("op_354_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_354 = slice_by_index(begin = var_354_begin_0, end = concat_11, end_mask = var_354_end_mask_0, x = var_351)[name = string("op_354")]; tensor var_356_shape = shape(x = var_349)[name = string("op_356_shape")]; int32 gather_13 = const()[name = string("gather_13"), val = int32(1)]; int32 gather_14 = const()[name = string("gather_14"), val = int32(2)]; int32 select_15 = const()[name = string("select_15"), val = int32(2)]; int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; int32 gather_15 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15, validate_indices = gather_15_validate_indices_0, x = var_356_shape)[name = string("gather_15")]; int32 gather_16 = const()[name = string("gather_16"), val = int32(128)]; tensor var_363_axes_0 = const()[name = string("op_363_axes_0"), val = tensor([2])]; tensor var_363 = expand_dims(axes = var_363_axes_0, x = var_349)[name = string("op_363")]; tensor shape_17 = shape(x = var_363)[name = string("shape_17")]; int32 concat_13_axis_0 = const()[name = string("concat_13_axis_0"), val = int32(0)]; bool concat_13_interleave_0 = const()[name = string("concat_13_interleave_0"), val = bool(false)]; tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (gather_13, gather_14, var_100, gather_15, gather_16))[name = string("concat_13")]; tensor real_div_0 = real_div(x = concat_13, y = shape_17)[name = string("real_div_0")]; tensor hidden_states_11 = tile(reps = real_div_0, x = var_363)[name = string("hidden_states_11")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, 16, -1, 128])]; tensor key_states_3 = reshape(shape = concat_14x, x = hidden_states_11)[name = string("key_states_3")]; tensor var_373_shape = shape(x = var_354)[name = string("op_373_shape")]; int32 gather_17 = const()[name = string("gather_17"), val = int32(1)]; int32 gather_18 = const()[name = string("gather_18"), val = int32(2)]; int32 select_19 = const()[name = string("select_19"), val = int32(2)]; int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; int32 gather_19 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19, validate_indices = gather_19_validate_indices_0, x = var_373_shape)[name = string("gather_19")]; int32 gather_20 = const()[name = string("gather_20"), val = int32(128)]; tensor var_380_axes_0 = const()[name = string("op_380_axes_0"), val = tensor([2])]; tensor var_380 = expand_dims(axes = var_380_axes_0, x = var_354)[name = string("op_380")]; tensor shape_22 = shape(x = var_380)[name = string("shape_22")]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_17, gather_18, var_100, gather_19, gather_20))[name = string("concat_15")]; tensor real_div_1 = real_div(x = concat_15, y = shape_22)[name = string("real_div_1")]; tensor hidden_states_15 = tile(reps = real_div_1, x = var_380)[name = string("hidden_states_15")]; tensor concat_16x = const()[name = string("concat_16x"), val = tensor([1, 16, -1, 128])]; tensor value_states_3 = reshape(shape = concat_16x, x = hidden_states_15)[name = string("value_states_3")]; tensor var_390_shape = shape(x = key_states_3)[name = string("op_390_shape")]; int32 select_21 = const()[name = string("select_21"), val = int32(2)]; int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; int32 gather_21 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21, validate_indices = gather_21_validate_indices_0, x = var_390_shape)[name = string("gather_21")]; int32 concat_17_values0_0 = const()[name = string("concat_17_values0_0"), val = int32(1)]; int32 concat_17_values1_0 = const()[name = string("concat_17_values1_0"), val = int32(1)]; int32 concat_17_values2_0 = const()[name = string("concat_17_values2_0"), val = int32(0)]; int32 concat_17_axis_0 = const()[name = string("concat_17_axis_0"), val = int32(0)]; bool concat_17_interleave_0 = const()[name = string("concat_17_interleave_0"), val = bool(false)]; tensor concat_17 = concat(axis = concat_17_axis_0, interleave = concat_17_interleave_0, values = (concat_17_values0_0, concat_17_values1_0, concat_17_values2_0, gather_21))[name = string("concat_17")]; tensor causal_mask_3_begin_0 = const()[name = string("causal_mask_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_3_end_mask_0 = const()[name = string("causal_mask_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_3 = slice_by_index(begin = causal_mask_3_begin_0, end = concat_17, end_mask = causal_mask_3_end_mask_0, x = cast_0)[name = string("causal_mask_3")]; tensor attn_output_1 = scaled_dot_product_attention(attn_mask = causal_mask_3, key = key_states_3, query = query_states_3, value = value_states_3)[name = string("attn_output_1")]; tensor var_396_perm_0 = const()[name = string("op_396_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_18x = const()[name = string("concat_18x"), val = tensor([1, -1, 2048])]; tensor var_396 = transpose(perm = var_396_perm_0, x = attn_output_1)[name = string("transpose_140")]; tensor input_1 = reshape(shape = concat_18x, x = var_396)[name = string("input_1")]; tensor linear_3_bias_0 = const()[name = string("linear_3_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1929571648)))]; tensor linear_3 = linear(bias = linear_3_bias_0, weight = model_model_layers_0_self_attn_o_proj_weight_quantized, x = input_1)[name = string("linear_3")]; tensor hidden_states_19 = add(x = inputs_embeds, y = linear_3)[name = string("hidden_states_19")]; fp32 var_94_promoted_1 = const()[name = string("op_94_promoted_1"), val = fp32(0x1p+1)]; tensor var_405 = pow(x = hidden_states_19, y = var_94_promoted_1)[name = string("op_405")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_405)[name = string("variance_3")]; fp32 var_408 = const()[name = string("op_408"), val = fp32(0x1.0c6f7ap-20)]; tensor var_409 = add(x = variance_3, y = var_408)[name = string("op_409")]; fp32 var_410_epsilon_0 = const()[name = string("op_410_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_410 = rsqrt(epsilon = var_410_epsilon_0, x = var_409)[name = string("op_410")]; tensor hidden_states_23 = mul(x = hidden_states_19, y = var_410)[name = string("hidden_states_23")]; tensor input_3 = mul(x = model_model_layers_0_post_attention_layernorm_weight, y = hidden_states_23)[name = string("input_3")]; tensor linear_4_bias_0 = const()[name = string("linear_4_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1929579904)))]; tensor linear_4 = linear(bias = linear_4_bias_0, weight = model_model_layers_0_mlp_gate_proj_weight_quantized, x = input_3)[name = string("linear_4")]; tensor var_422 = silu(x = linear_4)[name = string("op_422")]; tensor linear_5 = linear(bias = linear_4_bias_0, weight = model_model_layers_0_mlp_up_proj_weight_quantized, x = input_3)[name = string("linear_5")]; tensor input_7 = mul(x = var_422, y = linear_5)[name = string("input_7")]; tensor linear_6 = linear(bias = linear_3_bias_0, weight = model_model_layers_0_mlp_down_proj_weight_quantized, x = input_7)[name = string("linear_6")]; tensor hidden_states_29 = add(x = hidden_states_19, y = linear_6)[name = string("hidden_states_29")]; fp32 var_94_promoted_2 = const()[name = string("op_94_promoted_2"), val = fp32(0x1p+1)]; tensor var_435 = pow(x = hidden_states_29, y = var_94_promoted_2)[name = string("op_435")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_435)[name = string("variance_5")]; fp32 var_438 = const()[name = string("op_438"), val = fp32(0x1.0c6f7ap-20)]; tensor var_439 = add(x = variance_5, y = var_438)[name = string("op_439")]; fp32 var_440_epsilon_0 = const()[name = string("op_440_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_440 = rsqrt(epsilon = var_440_epsilon_0, x = var_439)[name = string("op_440")]; tensor hidden_states_33 = mul(x = hidden_states_29, y = var_440)[name = string("hidden_states_33")]; tensor hidden_states_37 = mul(x = model_model_layers_1_input_layernorm_weight, y = hidden_states_33)[name = string("hidden_states_37")]; tensor linear_7 = linear(bias = model_model_layers_1_self_attn_q_proj_bias, weight = model_model_layers_1_self_attn_q_proj_weight_quantized, x = hidden_states_37)[name = string("linear_7")]; tensor linear_8 = linear(bias = model_model_layers_1_self_attn_k_proj_bias, weight = model_model_layers_1_self_attn_k_proj_weight_quantized, x = hidden_states_37)[name = string("linear_8")]; tensor linear_9 = linear(bias = model_model_layers_1_self_attn_v_proj_bias, weight = model_model_layers_1_self_attn_v_proj_weight_quantized, x = hidden_states_37)[name = string("linear_9")]; tensor concat_19x = const()[name = string("concat_19x"), val = tensor([1, -1, 16, 128])]; tensor var_463 = reshape(shape = concat_19x, x = linear_7)[name = string("op_463")]; tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_20x = const()[name = string("concat_20x"), val = tensor([1, -1, 2, 128])]; tensor var_466 = reshape(shape = concat_20x, x = linear_8)[name = string("op_466")]; tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_21x = const()[name = string("concat_21x"), val = tensor([1, -1, 2, 128])]; tensor var_469 = reshape(shape = concat_21x, x = linear_9)[name = string("op_469")]; tensor v_state_3_perm_0 = const()[name = string("v_state_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_3 = transpose(perm = q_3_perm_0, x = var_463)[name = string("transpose_139")]; tensor var_473 = mul(x = q_3, y = cos_7)[name = string("op_473")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3)[name = string("x2_5")]; fp32 const_5_promoted = const()[name = string("const_5_promoted"), val = fp32(-0x1p+0)]; tensor var_484 = mul(x = x2_5, y = const_5_promoted)[name = string("op_484")]; bool var_486_interleave_0 = const()[name = string("op_486_interleave_0"), val = bool(false)]; tensor var_486 = concat(axis = var_88, interleave = var_486_interleave_0, values = (var_484, x1_5))[name = string("op_486")]; tensor var_487 = mul(x = var_486, y = sin_7)[name = string("op_487")]; tensor query_states_7 = add(x = var_473, y = var_487)[name = string("query_states_7")]; tensor k_3 = transpose(perm = k_3_perm_0, x = var_466)[name = string("transpose_138")]; tensor var_489 = mul(x = k_3, y = cos_7)[name = string("op_489")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3)[name = string("x2_7")]; fp32 const_6_promoted = const()[name = string("const_6_promoted"), val = fp32(-0x1p+0)]; tensor var_500 = mul(x = x2_7, y = const_6_promoted)[name = string("op_500")]; bool var_502_interleave_0 = const()[name = string("op_502_interleave_0"), val = bool(false)]; tensor var_502 = concat(axis = var_88, interleave = var_502_interleave_0, values = (var_500, x1_7))[name = string("op_502")]; tensor var_503 = mul(x = var_502, y = sin_7)[name = string("op_503")]; tensor k_state_3 = add(x = var_489, y = var_503)[name = string("k_state_3")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([1])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, expand_dims_12, expand_dims_13, expand_dims_2, expand_dims_15))[name = string("concat_24")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2 = slice_update(begin = concat_24, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3, x = key_cache_internal_tensor_assign_1_1)[name = string("key_cache_internal_tensor_assign_2")]; string cast_27_dtype_0 = const()[name = string("cast_27_dtype_0"), val = string("fp16")]; tensor cast_27 = cast(dtype = cast_27_dtype_0, x = key_cache_internal_tensor_assign_2)[name = string("cast_140")]; write_state(data = cast_27, input = key_cache)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_74")]; string key_cache_internal_tensor_assign_2_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_2_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_3 = transpose(perm = v_state_3_perm_0, x = var_469)[name = string("transpose_137")]; tensor value_cache_internal_tensor_assign_2 = slice_update(begin = concat_24, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3, x = value_cache_internal_tensor_assign_1_1)[name = string("value_cache_internal_tensor_assign_2")]; string cast_28_dtype_0 = const()[name = string("cast_28_dtype_0"), val = string("fp16")]; tensor cast_28 = cast(dtype = cast_28_dtype_0, x = value_cache_internal_tensor_assign_2)[name = string("cast_139")]; write_state(data = cast_28, input = value_cache)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_75")]; string value_cache_internal_tensor_assign_2_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_2_dtype_0"), val = string("fp32")]; tensor var_526_begin_0 = const()[name = string("op_526_begin_0"), val = tensor([1, 0, 0, 0, 0])]; tensor var_526_end_0 = const()[name = string("op_526_end_0"), val = tensor([2, 1, 2, 2048, 128])]; tensor var_526_end_mask_0 = const()[name = string("op_526_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_526_squeeze_mask_0 = const()[name = string("op_526_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_1 = cast(dtype = key_cache_internal_tensor_assign_2_dtype_0, x = coreml_update_state_74)[name = string("cast_138")]; tensor var_526 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, squeeze_mask = var_526_squeeze_mask_0, x = key_cache_internal_tensor_assign_2_1)[name = string("op_526")]; tensor var_529_begin_0 = const()[name = string("op_529_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_529_end_mask_0 = const()[name = string("op_529_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_529 = slice_by_index(begin = var_529_begin_0, end = concat_11, end_mask = var_529_end_mask_0, x = var_526)[name = string("op_529")]; tensor var_531_begin_0 = const()[name = string("op_531_begin_0"), val = tensor([1, 0, 0, 0, 0])]; tensor var_531_end_0 = const()[name = string("op_531_end_0"), val = tensor([2, 1, 2, 2048, 128])]; tensor var_531_end_mask_0 = const()[name = string("op_531_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_531_squeeze_mask_0 = const()[name = string("op_531_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_2_1 = cast(dtype = value_cache_internal_tensor_assign_2_dtype_0, x = coreml_update_state_75)[name = string("cast_137")]; tensor var_531 = slice_by_index(begin = var_531_begin_0, end = var_531_end_0, end_mask = var_531_end_mask_0, squeeze_mask = var_531_squeeze_mask_0, x = value_cache_internal_tensor_assign_2_1)[name = string("op_531")]; tensor var_534_begin_0 = const()[name = string("op_534_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_534_end_mask_0 = const()[name = string("op_534_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_534 = slice_by_index(begin = var_534_begin_0, end = concat_11, end_mask = var_534_end_mask_0, x = var_531)[name = string("op_534")]; tensor var_536_shape = shape(x = var_529)[name = string("op_536_shape")]; int32 gather_31 = const()[name = string("gather_31"), val = int32(1)]; int32 gather_32 = const()[name = string("gather_32"), val = int32(2)]; int32 select_33 = const()[name = string("select_33"), val = int32(2)]; int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; int32 gather_33 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33, validate_indices = gather_33_validate_indices_0, x = var_536_shape)[name = string("gather_33")]; int32 gather_34 = const()[name = string("gather_34"), val = int32(128)]; tensor var_543_axes_0 = const()[name = string("op_543_axes_0"), val = tensor([2])]; tensor var_543 = expand_dims(axes = var_543_axes_0, x = var_529)[name = string("op_543")]; tensor shape_37 = shape(x = var_543)[name = string("shape_37")]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (gather_31, gather_32, var_100, gather_33, gather_34))[name = string("concat_32")]; tensor real_div_2 = real_div(x = concat_32, y = shape_37)[name = string("real_div_2")]; tensor hidden_states_41 = tile(reps = real_div_2, x = var_543)[name = string("hidden_states_41")]; tensor concat_33x = const()[name = string("concat_33x"), val = tensor([1, 16, -1, 128])]; tensor key_states_7 = reshape(shape = concat_33x, x = hidden_states_41)[name = string("key_states_7")]; tensor var_553_shape = shape(x = var_534)[name = string("op_553_shape")]; int32 gather_35 = const()[name = string("gather_35"), val = int32(1)]; int32 gather_36 = const()[name = string("gather_36"), val = int32(2)]; int32 select_37 = const()[name = string("select_37"), val = int32(2)]; int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; int32 gather_37 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37, validate_indices = gather_37_validate_indices_0, x = var_553_shape)[name = string("gather_37")]; int32 gather_38 = const()[name = string("gather_38"), val = int32(128)]; tensor var_560_axes_0 = const()[name = string("op_560_axes_0"), val = tensor([2])]; tensor var_560 = expand_dims(axes = var_560_axes_0, x = var_534)[name = string("op_560")]; tensor shape_42 = shape(x = var_560)[name = string("shape_42")]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (gather_35, gather_36, var_100, gather_37, gather_38))[name = string("concat_34")]; tensor real_div_3 = real_div(x = concat_34, y = shape_42)[name = string("real_div_3")]; tensor hidden_states_45 = tile(reps = real_div_3, x = var_560)[name = string("hidden_states_45")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, 16, -1, 128])]; tensor value_states_7 = reshape(shape = concat_35x, x = hidden_states_45)[name = string("value_states_7")]; tensor var_570_shape = shape(x = key_states_7)[name = string("op_570_shape")]; int32 select_39 = const()[name = string("select_39"), val = int32(2)]; int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; int32 gather_39 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39, validate_indices = gather_39_validate_indices_0, x = var_570_shape)[name = string("gather_39")]; int32 concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = int32(1)]; int32 concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = int32(1)]; int32 concat_36_values2_0 = const()[name = string("concat_36_values2_0"), val = int32(0)]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, concat_36_values2_0, gather_39))[name = string("concat_36")]; tensor causal_mask_5_begin_0 = const()[name = string("causal_mask_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_5_end_mask_0 = const()[name = string("causal_mask_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_5 = slice_by_index(begin = causal_mask_5_begin_0, end = concat_36, end_mask = causal_mask_5_end_mask_0, x = cast_0)[name = string("causal_mask_5")]; tensor attn_output_5 = scaled_dot_product_attention(attn_mask = causal_mask_5, key = key_states_7, query = query_states_7, value = value_states_7)[name = string("attn_output_5")]; tensor var_576_perm_0 = const()[name = string("op_576_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_37x = const()[name = string("concat_37x"), val = tensor([1, -1, 2048])]; tensor var_576 = transpose(perm = var_576_perm_0, x = attn_output_5)[name = string("transpose_136")]; tensor input_9 = reshape(shape = concat_37x, x = var_576)[name = string("input_9")]; tensor linear_10 = linear(bias = linear_3_bias_0, weight = model_model_layers_1_self_attn_o_proj_weight_quantized, x = input_9)[name = string("linear_10")]; tensor hidden_states_49 = add(x = hidden_states_29, y = linear_10)[name = string("hidden_states_49")]; fp32 var_94_promoted_3 = const()[name = string("op_94_promoted_3"), val = fp32(0x1p+1)]; tensor var_585 = pow(x = hidden_states_49, y = var_94_promoted_3)[name = string("op_585")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_585)[name = string("variance_7")]; fp32 var_588 = const()[name = string("op_588"), val = fp32(0x1.0c6f7ap-20)]; tensor var_589 = add(x = variance_7, y = var_588)[name = string("op_589")]; fp32 var_590_epsilon_0 = const()[name = string("op_590_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_590 = rsqrt(epsilon = var_590_epsilon_0, x = var_589)[name = string("op_590")]; tensor hidden_states_53 = mul(x = hidden_states_49, y = var_590)[name = string("hidden_states_53")]; tensor input_11 = mul(x = model_model_layers_1_post_attention_layernorm_weight, y = hidden_states_53)[name = string("input_11")]; tensor linear_11 = linear(bias = linear_4_bias_0, weight = model_model_layers_1_mlp_gate_proj_weight_quantized, x = input_11)[name = string("linear_11")]; tensor var_602 = silu(x = linear_11)[name = string("op_602")]; tensor linear_12 = linear(bias = linear_4_bias_0, weight = model_model_layers_1_mlp_up_proj_weight_quantized, x = input_11)[name = string("linear_12")]; tensor input_15 = mul(x = var_602, y = linear_12)[name = string("input_15")]; tensor linear_13 = linear(bias = linear_3_bias_0, weight = model_model_layers_1_mlp_down_proj_weight_quantized, x = input_15)[name = string("linear_13")]; tensor hidden_states_59 = add(x = hidden_states_49, y = linear_13)[name = string("hidden_states_59")]; fp32 var_94_promoted_4 = const()[name = string("op_94_promoted_4"), val = fp32(0x1p+1)]; tensor var_615 = pow(x = hidden_states_59, y = var_94_promoted_4)[name = string("op_615")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; tensor variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_615)[name = string("variance_9")]; fp32 var_618 = const()[name = string("op_618"), val = fp32(0x1.0c6f7ap-20)]; tensor var_619 = add(x = variance_9, y = var_618)[name = string("op_619")]; fp32 var_620_epsilon_0 = const()[name = string("op_620_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_620 = rsqrt(epsilon = var_620_epsilon_0, x = var_619)[name = string("op_620")]; tensor hidden_states_63 = mul(x = hidden_states_59, y = var_620)[name = string("hidden_states_63")]; tensor hidden_states_67 = mul(x = model_model_layers_2_input_layernorm_weight, y = hidden_states_63)[name = string("hidden_states_67")]; tensor linear_14 = linear(bias = model_model_layers_2_self_attn_q_proj_bias, weight = model_model_layers_2_self_attn_q_proj_weight_quantized, x = hidden_states_67)[name = string("linear_14")]; tensor linear_15 = linear(bias = model_model_layers_2_self_attn_k_proj_bias, weight = model_model_layers_2_self_attn_k_proj_weight_quantized, x = hidden_states_67)[name = string("linear_15")]; tensor linear_16 = linear(bias = model_model_layers_2_self_attn_v_proj_bias, weight = model_model_layers_2_self_attn_v_proj_weight_quantized, x = hidden_states_67)[name = string("linear_16")]; tensor concat_38x = const()[name = string("concat_38x"), val = tensor([1, -1, 16, 128])]; tensor var_643 = reshape(shape = concat_38x, x = linear_14)[name = string("op_643")]; tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 2, 128])]; tensor var_646 = reshape(shape = concat_39x, x = linear_15)[name = string("op_646")]; tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_40x = const()[name = string("concat_40x"), val = tensor([1, -1, 2, 128])]; tensor var_649 = reshape(shape = concat_40x, x = linear_16)[name = string("op_649")]; tensor v_state_5_perm_0 = const()[name = string("v_state_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_5 = transpose(perm = q_5_perm_0, x = var_643)[name = string("transpose_135")]; tensor var_653 = mul(x = q_5, y = cos_7)[name = string("op_653")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5)[name = string("x2_9")]; fp32 const_7_promoted = const()[name = string("const_7_promoted"), val = fp32(-0x1p+0)]; tensor var_664 = mul(x = x2_9, y = const_7_promoted)[name = string("op_664")]; bool var_666_interleave_0 = const()[name = string("op_666_interleave_0"), val = bool(false)]; tensor var_666 = concat(axis = var_88, interleave = var_666_interleave_0, values = (var_664, x1_9))[name = string("op_666")]; tensor var_667 = mul(x = var_666, y = sin_7)[name = string("op_667")]; tensor query_states_11 = add(x = var_653, y = var_667)[name = string("query_states_11")]; tensor k_5 = transpose(perm = k_5_perm_0, x = var_646)[name = string("transpose_134")]; tensor var_669 = mul(x = k_5, y = cos_7)[name = string("op_669")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5)[name = string("x2_11")]; fp32 const_8_promoted = const()[name = string("const_8_promoted"), val = fp32(-0x1p+0)]; tensor var_680 = mul(x = x2_11, y = const_8_promoted)[name = string("op_680")]; bool var_682_interleave_0 = const()[name = string("op_682_interleave_0"), val = bool(false)]; tensor var_682 = concat(axis = var_88, interleave = var_682_interleave_0, values = (var_680, x1_11))[name = string("op_682")]; tensor var_683 = mul(x = var_682, y = sin_7)[name = string("op_683")]; tensor k_state_5 = add(x = var_669, y = var_683)[name = string("k_state_5")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([0])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor concat_43_values0_0 = const()[name = string("concat_43_values0_0"), val = tensor([2])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (concat_43_values0_0, expand_dims_24, expand_dims_25, expand_dims_2, expand_dims_27))[name = string("concat_43")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5, x = key_cache_internal_tensor_assign_2_1)[name = string("key_cache_internal_tensor_assign_3")]; string cast_41_dtype_0 = const()[name = string("cast_41_dtype_0"), val = string("fp16")]; tensor cast_41 = cast(dtype = cast_41_dtype_0, x = key_cache_internal_tensor_assign_3)[name = string("cast_136")]; write_state(data = cast_41, input = key_cache)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_76")]; string key_cache_internal_tensor_assign_3_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_3_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_5 = transpose(perm = v_state_5_perm_0, x = var_649)[name = string("transpose_133")]; tensor value_cache_internal_tensor_assign_3 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5, x = value_cache_internal_tensor_assign_2_1)[name = string("value_cache_internal_tensor_assign_3")]; string cast_42_dtype_0 = const()[name = string("cast_42_dtype_0"), val = string("fp16")]; tensor cast_42 = cast(dtype = cast_42_dtype_0, x = value_cache_internal_tensor_assign_3)[name = string("cast_135")]; write_state(data = cast_42, input = value_cache)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_77")]; string value_cache_internal_tensor_assign_3_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_3_dtype_0"), val = string("fp32")]; tensor var_706_begin_0 = const()[name = string("op_706_begin_0"), val = tensor([2, 0, 0, 0, 0])]; tensor var_706_end_0 = const()[name = string("op_706_end_0"), val = tensor([3, 1, 2, 2048, 128])]; tensor var_706_end_mask_0 = const()[name = string("op_706_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_706_squeeze_mask_0 = const()[name = string("op_706_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_1 = cast(dtype = key_cache_internal_tensor_assign_3_dtype_0, x = coreml_update_state_76)[name = string("cast_134")]; tensor var_706 = slice_by_index(begin = var_706_begin_0, end = var_706_end_0, end_mask = var_706_end_mask_0, squeeze_mask = var_706_squeeze_mask_0, x = key_cache_internal_tensor_assign_3_1)[name = string("op_706")]; tensor var_709_begin_0 = const()[name = string("op_709_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_709_end_mask_0 = const()[name = string("op_709_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_709 = slice_by_index(begin = var_709_begin_0, end = concat_11, end_mask = var_709_end_mask_0, x = var_706)[name = string("op_709")]; tensor var_711_begin_0 = const()[name = string("op_711_begin_0"), val = tensor([2, 0, 0, 0, 0])]; tensor var_711_end_0 = const()[name = string("op_711_end_0"), val = tensor([3, 1, 2, 2048, 128])]; tensor var_711_end_mask_0 = const()[name = string("op_711_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_711_squeeze_mask_0 = const()[name = string("op_711_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_3_1 = cast(dtype = value_cache_internal_tensor_assign_3_dtype_0, x = coreml_update_state_77)[name = string("cast_133")]; tensor var_711 = slice_by_index(begin = var_711_begin_0, end = var_711_end_0, end_mask = var_711_end_mask_0, squeeze_mask = var_711_squeeze_mask_0, x = value_cache_internal_tensor_assign_3_1)[name = string("op_711")]; tensor var_714_begin_0 = const()[name = string("op_714_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_714_end_mask_0 = const()[name = string("op_714_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_714 = slice_by_index(begin = var_714_begin_0, end = concat_11, end_mask = var_714_end_mask_0, x = var_711)[name = string("op_714")]; tensor var_716_shape = shape(x = var_709)[name = string("op_716_shape")]; int32 gather_49 = const()[name = string("gather_49"), val = int32(1)]; int32 gather_50 = const()[name = string("gather_50"), val = int32(2)]; int32 select_51 = const()[name = string("select_51"), val = int32(2)]; int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; int32 gather_51 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51, validate_indices = gather_51_validate_indices_0, x = var_716_shape)[name = string("gather_51")]; int32 gather_52 = const()[name = string("gather_52"), val = int32(128)]; tensor var_723_axes_0 = const()[name = string("op_723_axes_0"), val = tensor([2])]; tensor var_723 = expand_dims(axes = var_723_axes_0, x = var_709)[name = string("op_723")]; tensor shape_57 = shape(x = var_723)[name = string("shape_57")]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (gather_49, gather_50, var_100, gather_51, gather_52))[name = string("concat_51")]; tensor real_div_4 = real_div(x = concat_51, y = shape_57)[name = string("real_div_4")]; tensor hidden_states_71 = tile(reps = real_div_4, x = var_723)[name = string("hidden_states_71")]; tensor concat_52x = const()[name = string("concat_52x"), val = tensor([1, 16, -1, 128])]; tensor key_states_11 = reshape(shape = concat_52x, x = hidden_states_71)[name = string("key_states_11")]; tensor var_733_shape = shape(x = var_714)[name = string("op_733_shape")]; int32 gather_53 = const()[name = string("gather_53"), val = int32(1)]; int32 gather_54 = const()[name = string("gather_54"), val = int32(2)]; int32 select_55 = const()[name = string("select_55"), val = int32(2)]; int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; int32 gather_55 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55, validate_indices = gather_55_validate_indices_0, x = var_733_shape)[name = string("gather_55")]; int32 gather_56 = const()[name = string("gather_56"), val = int32(128)]; tensor var_740_axes_0 = const()[name = string("op_740_axes_0"), val = tensor([2])]; tensor var_740 = expand_dims(axes = var_740_axes_0, x = var_714)[name = string("op_740")]; tensor shape_62 = shape(x = var_740)[name = string("shape_62")]; int32 concat_53_axis_0 = const()[name = string("concat_53_axis_0"), val = int32(0)]; bool concat_53_interleave_0 = const()[name = string("concat_53_interleave_0"), val = bool(false)]; tensor concat_53 = concat(axis = concat_53_axis_0, interleave = concat_53_interleave_0, values = (gather_53, gather_54, var_100, gather_55, gather_56))[name = string("concat_53")]; tensor real_div_5 = real_div(x = concat_53, y = shape_62)[name = string("real_div_5")]; tensor hidden_states_75 = tile(reps = real_div_5, x = var_740)[name = string("hidden_states_75")]; tensor concat_54x = const()[name = string("concat_54x"), val = tensor([1, 16, -1, 128])]; tensor value_states_11 = reshape(shape = concat_54x, x = hidden_states_75)[name = string("value_states_11")]; tensor var_750_shape = shape(x = key_states_11)[name = string("op_750_shape")]; int32 select_57 = const()[name = string("select_57"), val = int32(2)]; int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; int32 gather_57 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57, validate_indices = gather_57_validate_indices_0, x = var_750_shape)[name = string("gather_57")]; int32 concat_55_values0_0 = const()[name = string("concat_55_values0_0"), val = int32(1)]; int32 concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = int32(1)]; int32 concat_55_values2_0 = const()[name = string("concat_55_values2_0"), val = int32(0)]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (concat_55_values0_0, concat_55_values1_0, concat_55_values2_0, gather_57))[name = string("concat_55")]; tensor causal_mask_7_begin_0 = const()[name = string("causal_mask_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_7_end_mask_0 = const()[name = string("causal_mask_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_7 = slice_by_index(begin = causal_mask_7_begin_0, end = concat_55, end_mask = causal_mask_7_end_mask_0, x = cast_0)[name = string("causal_mask_7")]; tensor attn_output_9 = scaled_dot_product_attention(attn_mask = causal_mask_7, key = key_states_11, query = query_states_11, value = value_states_11)[name = string("attn_output_9")]; tensor var_756_perm_0 = const()[name = string("op_756_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 2048])]; tensor var_756 = transpose(perm = var_756_perm_0, x = attn_output_9)[name = string("transpose_132")]; tensor input_17 = reshape(shape = concat_56x, x = var_756)[name = string("input_17")]; tensor linear_17 = linear(bias = linear_3_bias_0, weight = model_model_layers_2_self_attn_o_proj_weight_quantized, x = input_17)[name = string("linear_17")]; tensor hidden_states_79 = add(x = hidden_states_59, y = linear_17)[name = string("hidden_states_79")]; fp32 var_94_promoted_5 = const()[name = string("op_94_promoted_5"), val = fp32(0x1p+1)]; tensor var_765 = pow(x = hidden_states_79, y = var_94_promoted_5)[name = string("op_765")]; tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; tensor variance_11 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_765)[name = string("variance_11")]; fp32 var_768 = const()[name = string("op_768"), val = fp32(0x1.0c6f7ap-20)]; tensor var_769 = add(x = variance_11, y = var_768)[name = string("op_769")]; fp32 var_770_epsilon_0 = const()[name = string("op_770_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_770 = rsqrt(epsilon = var_770_epsilon_0, x = var_769)[name = string("op_770")]; tensor hidden_states_83 = mul(x = hidden_states_79, y = var_770)[name = string("hidden_states_83")]; tensor input_19 = mul(x = model_model_layers_2_post_attention_layernorm_weight, y = hidden_states_83)[name = string("input_19")]; tensor linear_18 = linear(bias = linear_4_bias_0, weight = model_model_layers_2_mlp_gate_proj_weight_quantized, x = input_19)[name = string("linear_18")]; tensor var_782 = silu(x = linear_18)[name = string("op_782")]; tensor linear_19 = linear(bias = linear_4_bias_0, weight = model_model_layers_2_mlp_up_proj_weight_quantized, x = input_19)[name = string("linear_19")]; tensor input_23 = mul(x = var_782, y = linear_19)[name = string("input_23")]; tensor linear_20 = linear(bias = linear_3_bias_0, weight = model_model_layers_2_mlp_down_proj_weight_quantized, x = input_23)[name = string("linear_20")]; tensor hidden_states_89 = add(x = hidden_states_79, y = linear_20)[name = string("hidden_states_89")]; fp32 var_94_promoted_6 = const()[name = string("op_94_promoted_6"), val = fp32(0x1p+1)]; tensor var_795 = pow(x = hidden_states_89, y = var_94_promoted_6)[name = string("op_795")]; tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; tensor variance_13 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_795)[name = string("variance_13")]; fp32 var_798 = const()[name = string("op_798"), val = fp32(0x1.0c6f7ap-20)]; tensor var_799 = add(x = variance_13, y = var_798)[name = string("op_799")]; fp32 var_800_epsilon_0 = const()[name = string("op_800_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_800 = rsqrt(epsilon = var_800_epsilon_0, x = var_799)[name = string("op_800")]; tensor hidden_states_93 = mul(x = hidden_states_89, y = var_800)[name = string("hidden_states_93")]; tensor hidden_states_97 = mul(x = model_model_layers_3_input_layernorm_weight, y = hidden_states_93)[name = string("hidden_states_97")]; tensor linear_21 = linear(bias = model_model_layers_3_self_attn_q_proj_bias, weight = model_model_layers_3_self_attn_q_proj_weight_quantized, x = hidden_states_97)[name = string("linear_21")]; tensor linear_22 = linear(bias = model_model_layers_3_self_attn_k_proj_bias, weight = model_model_layers_3_self_attn_k_proj_weight_quantized, x = hidden_states_97)[name = string("linear_22")]; tensor linear_23 = linear(bias = model_model_layers_3_self_attn_v_proj_bias, weight = model_model_layers_3_self_attn_v_proj_weight_quantized, x = hidden_states_97)[name = string("linear_23")]; tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 16, 128])]; tensor var_823 = reshape(shape = concat_57x, x = linear_21)[name = string("op_823")]; tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 2, 128])]; tensor var_826 = reshape(shape = concat_58x, x = linear_22)[name = string("op_826")]; tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_59x = const()[name = string("concat_59x"), val = tensor([1, -1, 2, 128])]; tensor var_829 = reshape(shape = concat_59x, x = linear_23)[name = string("op_829")]; tensor v_state_7_perm_0 = const()[name = string("v_state_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_7 = transpose(perm = q_7_perm_0, x = var_823)[name = string("transpose_131")]; tensor var_833 = mul(x = q_7, y = cos_7)[name = string("op_833")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7)[name = string("x2_13")]; fp32 const_9_promoted = const()[name = string("const_9_promoted"), val = fp32(-0x1p+0)]; tensor var_844 = mul(x = x2_13, y = const_9_promoted)[name = string("op_844")]; bool var_846_interleave_0 = const()[name = string("op_846_interleave_0"), val = bool(false)]; tensor var_846 = concat(axis = var_88, interleave = var_846_interleave_0, values = (var_844, x1_13))[name = string("op_846")]; tensor var_847 = mul(x = var_846, y = sin_7)[name = string("op_847")]; tensor query_states_15 = add(x = var_833, y = var_847)[name = string("query_states_15")]; tensor k_7 = transpose(perm = k_7_perm_0, x = var_826)[name = string("transpose_130")]; tensor var_849 = mul(x = k_7, y = cos_7)[name = string("op_849")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7)[name = string("x2_15")]; fp32 const_10_promoted = const()[name = string("const_10_promoted"), val = fp32(-0x1p+0)]; tensor var_860 = mul(x = x2_15, y = const_10_promoted)[name = string("op_860")]; bool var_862_interleave_0 = const()[name = string("op_862_interleave_0"), val = bool(false)]; tensor var_862 = concat(axis = var_88, interleave = var_862_interleave_0, values = (var_860, x1_15))[name = string("op_862")]; tensor var_863 = mul(x = var_862, y = sin_7)[name = string("op_863")]; tensor k_state_7 = add(x = var_849, y = var_863)[name = string("k_state_7")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([0])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor concat_62_values0_0 = const()[name = string("concat_62_values0_0"), val = tensor([3])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (concat_62_values0_0, expand_dims_36, expand_dims_37, expand_dims_2, expand_dims_39))[name = string("concat_62")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4 = slice_update(begin = concat_62, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7, x = key_cache_internal_tensor_assign_3_1)[name = string("key_cache_internal_tensor_assign_4")]; string cast_55_dtype_0 = const()[name = string("cast_55_dtype_0"), val = string("fp16")]; tensor cast_55 = cast(dtype = cast_55_dtype_0, x = key_cache_internal_tensor_assign_4)[name = string("cast_132")]; write_state(data = cast_55, input = key_cache)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_78")]; string key_cache_internal_tensor_assign_4_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_4_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_7 = transpose(perm = v_state_7_perm_0, x = var_829)[name = string("transpose_129")]; tensor value_cache_internal_tensor_assign_4 = slice_update(begin = concat_62, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7, x = value_cache_internal_tensor_assign_3_1)[name = string("value_cache_internal_tensor_assign_4")]; string cast_56_dtype_0 = const()[name = string("cast_56_dtype_0"), val = string("fp16")]; tensor cast_56 = cast(dtype = cast_56_dtype_0, x = value_cache_internal_tensor_assign_4)[name = string("cast_131")]; write_state(data = cast_56, input = value_cache)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_79")]; string value_cache_internal_tensor_assign_4_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_4_dtype_0"), val = string("fp32")]; tensor var_886_begin_0 = const()[name = string("op_886_begin_0"), val = tensor([3, 0, 0, 0, 0])]; tensor var_886_end_0 = const()[name = string("op_886_end_0"), val = tensor([4, 1, 2, 2048, 128])]; tensor var_886_end_mask_0 = const()[name = string("op_886_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_886_squeeze_mask_0 = const()[name = string("op_886_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_1 = cast(dtype = key_cache_internal_tensor_assign_4_dtype_0, x = coreml_update_state_78)[name = string("cast_130")]; tensor var_886 = slice_by_index(begin = var_886_begin_0, end = var_886_end_0, end_mask = var_886_end_mask_0, squeeze_mask = var_886_squeeze_mask_0, x = key_cache_internal_tensor_assign_4_1)[name = string("op_886")]; tensor var_889_begin_0 = const()[name = string("op_889_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_889_end_mask_0 = const()[name = string("op_889_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_889 = slice_by_index(begin = var_889_begin_0, end = concat_11, end_mask = var_889_end_mask_0, x = var_886)[name = string("op_889")]; tensor var_891_begin_0 = const()[name = string("op_891_begin_0"), val = tensor([3, 0, 0, 0, 0])]; tensor var_891_end_0 = const()[name = string("op_891_end_0"), val = tensor([4, 1, 2, 2048, 128])]; tensor var_891_end_mask_0 = const()[name = string("op_891_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_891_squeeze_mask_0 = const()[name = string("op_891_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_4_1 = cast(dtype = value_cache_internal_tensor_assign_4_dtype_0, x = coreml_update_state_79)[name = string("cast_129")]; tensor var_891 = slice_by_index(begin = var_891_begin_0, end = var_891_end_0, end_mask = var_891_end_mask_0, squeeze_mask = var_891_squeeze_mask_0, x = value_cache_internal_tensor_assign_4_1)[name = string("op_891")]; tensor var_894_begin_0 = const()[name = string("op_894_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_894_end_mask_0 = const()[name = string("op_894_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_894 = slice_by_index(begin = var_894_begin_0, end = concat_11, end_mask = var_894_end_mask_0, x = var_891)[name = string("op_894")]; tensor var_896_shape = shape(x = var_889)[name = string("op_896_shape")]; int32 gather_67 = const()[name = string("gather_67"), val = int32(1)]; int32 gather_68 = const()[name = string("gather_68"), val = int32(2)]; int32 select_69 = const()[name = string("select_69"), val = int32(2)]; int32 gather_69_axis_0 = const()[name = string("gather_69_axis_0"), val = int32(0)]; int32 gather_69_batch_dims_0 = const()[name = string("gather_69_batch_dims_0"), val = int32(0)]; bool gather_69_validate_indices_0 = const()[name = string("gather_69_validate_indices_0"), val = bool(false)]; int32 gather_69 = gather(axis = gather_69_axis_0, batch_dims = gather_69_batch_dims_0, indices = select_69, validate_indices = gather_69_validate_indices_0, x = var_896_shape)[name = string("gather_69")]; int32 gather_70 = const()[name = string("gather_70"), val = int32(128)]; tensor var_903_axes_0 = const()[name = string("op_903_axes_0"), val = tensor([2])]; tensor var_903 = expand_dims(axes = var_903_axes_0, x = var_889)[name = string("op_903")]; tensor shape_77 = shape(x = var_903)[name = string("shape_77")]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (gather_67, gather_68, var_100, gather_69, gather_70))[name = string("concat_70")]; tensor real_div_6 = real_div(x = concat_70, y = shape_77)[name = string("real_div_6")]; tensor hidden_states_101 = tile(reps = real_div_6, x = var_903)[name = string("hidden_states_101")]; tensor concat_71x = const()[name = string("concat_71x"), val = tensor([1, 16, -1, 128])]; tensor key_states_15 = reshape(shape = concat_71x, x = hidden_states_101)[name = string("key_states_15")]; tensor var_913_shape = shape(x = var_894)[name = string("op_913_shape")]; int32 gather_71 = const()[name = string("gather_71"), val = int32(1)]; int32 gather_72 = const()[name = string("gather_72"), val = int32(2)]; int32 select_73 = const()[name = string("select_73"), val = int32(2)]; int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; int32 gather_73 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73, validate_indices = gather_73_validate_indices_0, x = var_913_shape)[name = string("gather_73")]; int32 gather_74 = const()[name = string("gather_74"), val = int32(128)]; tensor var_920_axes_0 = const()[name = string("op_920_axes_0"), val = tensor([2])]; tensor var_920 = expand_dims(axes = var_920_axes_0, x = var_894)[name = string("op_920")]; tensor shape_82 = shape(x = var_920)[name = string("shape_82")]; int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (gather_71, gather_72, var_100, gather_73, gather_74))[name = string("concat_72")]; tensor real_div_7 = real_div(x = concat_72, y = shape_82)[name = string("real_div_7")]; tensor hidden_states_105 = tile(reps = real_div_7, x = var_920)[name = string("hidden_states_105")]; tensor concat_73x = const()[name = string("concat_73x"), val = tensor([1, 16, -1, 128])]; tensor value_states_15 = reshape(shape = concat_73x, x = hidden_states_105)[name = string("value_states_15")]; tensor var_930_shape = shape(x = key_states_15)[name = string("op_930_shape")]; int32 select_75 = const()[name = string("select_75"), val = int32(2)]; int32 gather_75_axis_0 = const()[name = string("gather_75_axis_0"), val = int32(0)]; int32 gather_75_batch_dims_0 = const()[name = string("gather_75_batch_dims_0"), val = int32(0)]; bool gather_75_validate_indices_0 = const()[name = string("gather_75_validate_indices_0"), val = bool(false)]; int32 gather_75 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75, validate_indices = gather_75_validate_indices_0, x = var_930_shape)[name = string("gather_75")]; int32 concat_74_values0_0 = const()[name = string("concat_74_values0_0"), val = int32(1)]; int32 concat_74_values1_0 = const()[name = string("concat_74_values1_0"), val = int32(1)]; int32 concat_74_values2_0 = const()[name = string("concat_74_values2_0"), val = int32(0)]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (concat_74_values0_0, concat_74_values1_0, concat_74_values2_0, gather_75))[name = string("concat_74")]; tensor causal_mask_9_begin_0 = const()[name = string("causal_mask_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_9_end_mask_0 = const()[name = string("causal_mask_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_9 = slice_by_index(begin = causal_mask_9_begin_0, end = concat_74, end_mask = causal_mask_9_end_mask_0, x = cast_0)[name = string("causal_mask_9")]; tensor attn_output_13 = scaled_dot_product_attention(attn_mask = causal_mask_9, key = key_states_15, query = query_states_15, value = value_states_15)[name = string("attn_output_13")]; tensor var_936_perm_0 = const()[name = string("op_936_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_75x = const()[name = string("concat_75x"), val = tensor([1, -1, 2048])]; tensor var_936 = transpose(perm = var_936_perm_0, x = attn_output_13)[name = string("transpose_128")]; tensor input_25 = reshape(shape = concat_75x, x = var_936)[name = string("input_25")]; tensor linear_24 = linear(bias = linear_3_bias_0, weight = model_model_layers_3_self_attn_o_proj_weight_quantized, x = input_25)[name = string("linear_24")]; tensor hidden_states_109 = add(x = hidden_states_89, y = linear_24)[name = string("hidden_states_109")]; fp32 var_94_promoted_7 = const()[name = string("op_94_promoted_7"), val = fp32(0x1p+1)]; tensor var_945 = pow(x = hidden_states_109, y = var_94_promoted_7)[name = string("op_945")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; tensor variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_945)[name = string("variance_15")]; fp32 var_948 = const()[name = string("op_948"), val = fp32(0x1.0c6f7ap-20)]; tensor var_949 = add(x = variance_15, y = var_948)[name = string("op_949")]; fp32 var_950_epsilon_0 = const()[name = string("op_950_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_950 = rsqrt(epsilon = var_950_epsilon_0, x = var_949)[name = string("op_950")]; tensor hidden_states_113 = mul(x = hidden_states_109, y = var_950)[name = string("hidden_states_113")]; tensor input_27 = mul(x = model_model_layers_3_post_attention_layernorm_weight, y = hidden_states_113)[name = string("input_27")]; tensor linear_25 = linear(bias = linear_4_bias_0, weight = model_model_layers_3_mlp_gate_proj_weight_quantized, x = input_27)[name = string("linear_25")]; tensor var_962 = silu(x = linear_25)[name = string("op_962")]; tensor linear_26 = linear(bias = linear_4_bias_0, weight = model_model_layers_3_mlp_up_proj_weight_quantized, x = input_27)[name = string("linear_26")]; tensor input_31 = mul(x = var_962, y = linear_26)[name = string("input_31")]; tensor linear_27 = linear(bias = linear_3_bias_0, weight = model_model_layers_3_mlp_down_proj_weight_quantized, x = input_31)[name = string("linear_27")]; tensor hidden_states_119 = add(x = hidden_states_109, y = linear_27)[name = string("hidden_states_119")]; fp32 var_94_promoted_8 = const()[name = string("op_94_promoted_8"), val = fp32(0x1p+1)]; tensor var_975 = pow(x = hidden_states_119, y = var_94_promoted_8)[name = string("op_975")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; tensor variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_975)[name = string("variance_17")]; fp32 var_978 = const()[name = string("op_978"), val = fp32(0x1.0c6f7ap-20)]; tensor var_979 = add(x = variance_17, y = var_978)[name = string("op_979")]; fp32 var_980_epsilon_0 = const()[name = string("op_980_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_980 = rsqrt(epsilon = var_980_epsilon_0, x = var_979)[name = string("op_980")]; tensor hidden_states_123 = mul(x = hidden_states_119, y = var_980)[name = string("hidden_states_123")]; tensor hidden_states_127 = mul(x = model_model_layers_4_input_layernorm_weight, y = hidden_states_123)[name = string("hidden_states_127")]; tensor linear_28 = linear(bias = model_model_layers_4_self_attn_q_proj_bias, weight = model_model_layers_4_self_attn_q_proj_weight_quantized, x = hidden_states_127)[name = string("linear_28")]; tensor linear_29 = linear(bias = model_model_layers_4_self_attn_k_proj_bias, weight = model_model_layers_4_self_attn_k_proj_weight_quantized, x = hidden_states_127)[name = string("linear_29")]; tensor linear_30 = linear(bias = model_model_layers_4_self_attn_v_proj_bias, weight = model_model_layers_4_self_attn_v_proj_weight_quantized, x = hidden_states_127)[name = string("linear_30")]; tensor concat_76x = const()[name = string("concat_76x"), val = tensor([1, -1, 16, 128])]; tensor var_1003 = reshape(shape = concat_76x, x = linear_28)[name = string("op_1003")]; tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_77x = const()[name = string("concat_77x"), val = tensor([1, -1, 2, 128])]; tensor var_1006 = reshape(shape = concat_77x, x = linear_29)[name = string("op_1006")]; tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 2, 128])]; tensor var_1009 = reshape(shape = concat_78x, x = linear_30)[name = string("op_1009")]; tensor v_state_9_perm_0 = const()[name = string("v_state_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_9 = transpose(perm = q_9_perm_0, x = var_1003)[name = string("transpose_127")]; tensor var_1013 = mul(x = q_9, y = cos_7)[name = string("op_1013")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9)[name = string("x2_17")]; fp32 const_11_promoted = const()[name = string("const_11_promoted"), val = fp32(-0x1p+0)]; tensor var_1024 = mul(x = x2_17, y = const_11_promoted)[name = string("op_1024")]; bool var_1026_interleave_0 = const()[name = string("op_1026_interleave_0"), val = bool(false)]; tensor var_1026 = concat(axis = var_88, interleave = var_1026_interleave_0, values = (var_1024, x1_17))[name = string("op_1026")]; tensor var_1027 = mul(x = var_1026, y = sin_7)[name = string("op_1027")]; tensor query_states_19 = add(x = var_1013, y = var_1027)[name = string("query_states_19")]; tensor k_9 = transpose(perm = k_9_perm_0, x = var_1006)[name = string("transpose_126")]; tensor var_1029 = mul(x = k_9, y = cos_7)[name = string("op_1029")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9)[name = string("x2_19")]; fp32 const_12_promoted = const()[name = string("const_12_promoted"), val = fp32(-0x1p+0)]; tensor var_1040 = mul(x = x2_19, y = const_12_promoted)[name = string("op_1040")]; bool var_1042_interleave_0 = const()[name = string("op_1042_interleave_0"), val = bool(false)]; tensor var_1042 = concat(axis = var_88, interleave = var_1042_interleave_0, values = (var_1040, x1_19))[name = string("op_1042")]; tensor var_1043 = mul(x = var_1042, y = sin_7)[name = string("op_1043")]; tensor k_state_9 = add(x = var_1029, y = var_1043)[name = string("k_state_9")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor([4])]; int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, expand_dims_48, expand_dims_49, expand_dims_2, expand_dims_51))[name = string("concat_81")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5 = slice_update(begin = concat_81, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9, x = key_cache_internal_tensor_assign_4_1)[name = string("key_cache_internal_tensor_assign_5")]; string cast_69_dtype_0 = const()[name = string("cast_69_dtype_0"), val = string("fp16")]; tensor cast_69 = cast(dtype = cast_69_dtype_0, x = key_cache_internal_tensor_assign_5)[name = string("cast_128")]; write_state(data = cast_69, input = key_cache)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_80")]; string key_cache_internal_tensor_assign_5_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_5_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_9 = transpose(perm = v_state_9_perm_0, x = var_1009)[name = string("transpose_125")]; tensor value_cache_internal_tensor_assign_5 = slice_update(begin = concat_81, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9, x = value_cache_internal_tensor_assign_4_1)[name = string("value_cache_internal_tensor_assign_5")]; string cast_70_dtype_0 = const()[name = string("cast_70_dtype_0"), val = string("fp16")]; tensor cast_70 = cast(dtype = cast_70_dtype_0, x = value_cache_internal_tensor_assign_5)[name = string("cast_127")]; write_state(data = cast_70, input = value_cache)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_81")]; string value_cache_internal_tensor_assign_5_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_5_dtype_0"), val = string("fp32")]; tensor var_1066_begin_0 = const()[name = string("op_1066_begin_0"), val = tensor([4, 0, 0, 0, 0])]; tensor var_1066_end_0 = const()[name = string("op_1066_end_0"), val = tensor([5, 1, 2, 2048, 128])]; tensor var_1066_end_mask_0 = const()[name = string("op_1066_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1066_squeeze_mask_0 = const()[name = string("op_1066_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_1 = cast(dtype = key_cache_internal_tensor_assign_5_dtype_0, x = coreml_update_state_80)[name = string("cast_126")]; tensor var_1066 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, squeeze_mask = var_1066_squeeze_mask_0, x = key_cache_internal_tensor_assign_5_1)[name = string("op_1066")]; tensor var_1069_begin_0 = const()[name = string("op_1069_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1069_end_mask_0 = const()[name = string("op_1069_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1069 = slice_by_index(begin = var_1069_begin_0, end = concat_11, end_mask = var_1069_end_mask_0, x = var_1066)[name = string("op_1069")]; tensor var_1071_begin_0 = const()[name = string("op_1071_begin_0"), val = tensor([4, 0, 0, 0, 0])]; tensor var_1071_end_0 = const()[name = string("op_1071_end_0"), val = tensor([5, 1, 2, 2048, 128])]; tensor var_1071_end_mask_0 = const()[name = string("op_1071_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1071_squeeze_mask_0 = const()[name = string("op_1071_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_5_1 = cast(dtype = value_cache_internal_tensor_assign_5_dtype_0, x = coreml_update_state_81)[name = string("cast_125")]; tensor var_1071 = slice_by_index(begin = var_1071_begin_0, end = var_1071_end_0, end_mask = var_1071_end_mask_0, squeeze_mask = var_1071_squeeze_mask_0, x = value_cache_internal_tensor_assign_5_1)[name = string("op_1071")]; tensor var_1074_begin_0 = const()[name = string("op_1074_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1074_end_mask_0 = const()[name = string("op_1074_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1074 = slice_by_index(begin = var_1074_begin_0, end = concat_11, end_mask = var_1074_end_mask_0, x = var_1071)[name = string("op_1074")]; tensor var_1076_shape = shape(x = var_1069)[name = string("op_1076_shape")]; int32 gather_85 = const()[name = string("gather_85"), val = int32(1)]; int32 gather_86 = const()[name = string("gather_86"), val = int32(2)]; int32 select_87 = const()[name = string("select_87"), val = int32(2)]; int32 gather_87_axis_0 = const()[name = string("gather_87_axis_0"), val = int32(0)]; int32 gather_87_batch_dims_0 = const()[name = string("gather_87_batch_dims_0"), val = int32(0)]; bool gather_87_validate_indices_0 = const()[name = string("gather_87_validate_indices_0"), val = bool(false)]; int32 gather_87 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = select_87, validate_indices = gather_87_validate_indices_0, x = var_1076_shape)[name = string("gather_87")]; int32 gather_88 = const()[name = string("gather_88"), val = int32(128)]; tensor var_1083_axes_0 = const()[name = string("op_1083_axes_0"), val = tensor([2])]; tensor var_1083 = expand_dims(axes = var_1083_axes_0, x = var_1069)[name = string("op_1083")]; tensor shape_97 = shape(x = var_1083)[name = string("shape_97")]; int32 concat_89_axis_0 = const()[name = string("concat_89_axis_0"), val = int32(0)]; bool concat_89_interleave_0 = const()[name = string("concat_89_interleave_0"), val = bool(false)]; tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_85, gather_86, var_100, gather_87, gather_88))[name = string("concat_89")]; tensor real_div_8 = real_div(x = concat_89, y = shape_97)[name = string("real_div_8")]; tensor hidden_states_131 = tile(reps = real_div_8, x = var_1083)[name = string("hidden_states_131")]; tensor concat_90x = const()[name = string("concat_90x"), val = tensor([1, 16, -1, 128])]; tensor key_states_19 = reshape(shape = concat_90x, x = hidden_states_131)[name = string("key_states_19")]; tensor var_1093_shape = shape(x = var_1074)[name = string("op_1093_shape")]; int32 gather_89 = const()[name = string("gather_89"), val = int32(1)]; int32 gather_90 = const()[name = string("gather_90"), val = int32(2)]; int32 select_91 = const()[name = string("select_91"), val = int32(2)]; int32 gather_91_axis_0 = const()[name = string("gather_91_axis_0"), val = int32(0)]; int32 gather_91_batch_dims_0 = const()[name = string("gather_91_batch_dims_0"), val = int32(0)]; bool gather_91_validate_indices_0 = const()[name = string("gather_91_validate_indices_0"), val = bool(false)]; int32 gather_91 = gather(axis = gather_91_axis_0, batch_dims = gather_91_batch_dims_0, indices = select_91, validate_indices = gather_91_validate_indices_0, x = var_1093_shape)[name = string("gather_91")]; int32 gather_92 = const()[name = string("gather_92"), val = int32(128)]; tensor var_1100_axes_0 = const()[name = string("op_1100_axes_0"), val = tensor([2])]; tensor var_1100 = expand_dims(axes = var_1100_axes_0, x = var_1074)[name = string("op_1100")]; tensor shape_102 = shape(x = var_1100)[name = string("shape_102")]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (gather_89, gather_90, var_100, gather_91, gather_92))[name = string("concat_91")]; tensor real_div_9 = real_div(x = concat_91, y = shape_102)[name = string("real_div_9")]; tensor hidden_states_135 = tile(reps = real_div_9, x = var_1100)[name = string("hidden_states_135")]; tensor concat_92x = const()[name = string("concat_92x"), val = tensor([1, 16, -1, 128])]; tensor value_states_19 = reshape(shape = concat_92x, x = hidden_states_135)[name = string("value_states_19")]; tensor var_1110_shape = shape(x = key_states_19)[name = string("op_1110_shape")]; int32 select_93 = const()[name = string("select_93"), val = int32(2)]; int32 gather_93_axis_0 = const()[name = string("gather_93_axis_0"), val = int32(0)]; int32 gather_93_batch_dims_0 = const()[name = string("gather_93_batch_dims_0"), val = int32(0)]; bool gather_93_validate_indices_0 = const()[name = string("gather_93_validate_indices_0"), val = bool(false)]; int32 gather_93 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93, validate_indices = gather_93_validate_indices_0, x = var_1110_shape)[name = string("gather_93")]; int32 concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = int32(1)]; int32 concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = int32(1)]; int32 concat_93_values2_0 = const()[name = string("concat_93_values2_0"), val = int32(0)]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, concat_93_values2_0, gather_93))[name = string("concat_93")]; tensor causal_mask_11_begin_0 = const()[name = string("causal_mask_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_11_end_mask_0 = const()[name = string("causal_mask_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_11 = slice_by_index(begin = causal_mask_11_begin_0, end = concat_93, end_mask = causal_mask_11_end_mask_0, x = cast_0)[name = string("causal_mask_11")]; tensor attn_output_17 = scaled_dot_product_attention(attn_mask = causal_mask_11, key = key_states_19, query = query_states_19, value = value_states_19)[name = string("attn_output_17")]; tensor var_1116_perm_0 = const()[name = string("op_1116_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_94x = const()[name = string("concat_94x"), val = tensor([1, -1, 2048])]; tensor var_1116 = transpose(perm = var_1116_perm_0, x = attn_output_17)[name = string("transpose_124")]; tensor input_33 = reshape(shape = concat_94x, x = var_1116)[name = string("input_33")]; tensor linear_31 = linear(bias = linear_3_bias_0, weight = model_model_layers_4_self_attn_o_proj_weight_quantized, x = input_33)[name = string("linear_31")]; tensor hidden_states_139 = add(x = hidden_states_119, y = linear_31)[name = string("hidden_states_139")]; fp32 var_94_promoted_9 = const()[name = string("op_94_promoted_9"), val = fp32(0x1p+1)]; tensor var_1125 = pow(x = hidden_states_139, y = var_94_promoted_9)[name = string("op_1125")]; tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; tensor variance_19 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1125)[name = string("variance_19")]; fp32 var_1128 = const()[name = string("op_1128"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1129 = add(x = variance_19, y = var_1128)[name = string("op_1129")]; fp32 var_1130_epsilon_0 = const()[name = string("op_1130_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1130 = rsqrt(epsilon = var_1130_epsilon_0, x = var_1129)[name = string("op_1130")]; tensor hidden_states_143 = mul(x = hidden_states_139, y = var_1130)[name = string("hidden_states_143")]; tensor input_35 = mul(x = model_model_layers_4_post_attention_layernorm_weight, y = hidden_states_143)[name = string("input_35")]; tensor linear_32 = linear(bias = linear_4_bias_0, weight = model_model_layers_4_mlp_gate_proj_weight_quantized, x = input_35)[name = string("linear_32")]; tensor var_1142 = silu(x = linear_32)[name = string("op_1142")]; tensor linear_33 = linear(bias = linear_4_bias_0, weight = model_model_layers_4_mlp_up_proj_weight_quantized, x = input_35)[name = string("linear_33")]; tensor input_39 = mul(x = var_1142, y = linear_33)[name = string("input_39")]; tensor linear_34 = linear(bias = linear_3_bias_0, weight = model_model_layers_4_mlp_down_proj_weight_quantized, x = input_39)[name = string("linear_34")]; tensor hidden_states_149 = add(x = hidden_states_139, y = linear_34)[name = string("hidden_states_149")]; fp32 var_94_promoted_10 = const()[name = string("op_94_promoted_10"), val = fp32(0x1p+1)]; tensor var_1155 = pow(x = hidden_states_149, y = var_94_promoted_10)[name = string("op_1155")]; tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; tensor variance_21 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1155)[name = string("variance_21")]; fp32 var_1158 = const()[name = string("op_1158"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1159 = add(x = variance_21, y = var_1158)[name = string("op_1159")]; fp32 var_1160_epsilon_0 = const()[name = string("op_1160_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1160 = rsqrt(epsilon = var_1160_epsilon_0, x = var_1159)[name = string("op_1160")]; tensor hidden_states_153 = mul(x = hidden_states_149, y = var_1160)[name = string("hidden_states_153")]; tensor hidden_states_157 = mul(x = model_model_layers_5_input_layernorm_weight, y = hidden_states_153)[name = string("hidden_states_157")]; tensor linear_35 = linear(bias = model_model_layers_5_self_attn_q_proj_bias, weight = model_model_layers_5_self_attn_q_proj_weight_quantized, x = hidden_states_157)[name = string("linear_35")]; tensor linear_36 = linear(bias = model_model_layers_5_self_attn_k_proj_bias, weight = model_model_layers_5_self_attn_k_proj_weight_quantized, x = hidden_states_157)[name = string("linear_36")]; tensor linear_37 = linear(bias = model_model_layers_5_self_attn_v_proj_bias, weight = model_model_layers_5_self_attn_v_proj_weight_quantized, x = hidden_states_157)[name = string("linear_37")]; tensor concat_95x = const()[name = string("concat_95x"), val = tensor([1, -1, 16, 128])]; tensor var_1183 = reshape(shape = concat_95x, x = linear_35)[name = string("op_1183")]; tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_96x = const()[name = string("concat_96x"), val = tensor([1, -1, 2, 128])]; tensor var_1186 = reshape(shape = concat_96x, x = linear_36)[name = string("op_1186")]; tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_97x = const()[name = string("concat_97x"), val = tensor([1, -1, 2, 128])]; tensor var_1189 = reshape(shape = concat_97x, x = linear_37)[name = string("op_1189")]; tensor v_state_11_perm_0 = const()[name = string("v_state_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_11 = transpose(perm = q_11_perm_0, x = var_1183)[name = string("transpose_123")]; tensor var_1193 = mul(x = q_11, y = cos_7)[name = string("op_1193")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11)[name = string("x2_21")]; fp32 const_13_promoted = const()[name = string("const_13_promoted"), val = fp32(-0x1p+0)]; tensor var_1204 = mul(x = x2_21, y = const_13_promoted)[name = string("op_1204")]; bool var_1206_interleave_0 = const()[name = string("op_1206_interleave_0"), val = bool(false)]; tensor var_1206 = concat(axis = var_88, interleave = var_1206_interleave_0, values = (var_1204, x1_21))[name = string("op_1206")]; tensor var_1207 = mul(x = var_1206, y = sin_7)[name = string("op_1207")]; tensor query_states_23 = add(x = var_1193, y = var_1207)[name = string("query_states_23")]; tensor k_11 = transpose(perm = k_11_perm_0, x = var_1186)[name = string("transpose_122")]; tensor var_1209 = mul(x = k_11, y = cos_7)[name = string("op_1209")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11)[name = string("x2_23")]; fp32 const_14_promoted = const()[name = string("const_14_promoted"), val = fp32(-0x1p+0)]; tensor var_1220 = mul(x = x2_23, y = const_14_promoted)[name = string("op_1220")]; bool var_1222_interleave_0 = const()[name = string("op_1222_interleave_0"), val = bool(false)]; tensor var_1222 = concat(axis = var_88, interleave = var_1222_interleave_0, values = (var_1220, x1_23))[name = string("op_1222")]; tensor var_1223 = mul(x = var_1222, y = sin_7)[name = string("op_1223")]; tensor k_state_11 = add(x = var_1209, y = var_1223)[name = string("k_state_11")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([0])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor concat_100_values0_0 = const()[name = string("concat_100_values0_0"), val = tensor([5])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (concat_100_values0_0, expand_dims_60, expand_dims_61, expand_dims_2, expand_dims_63))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6 = slice_update(begin = concat_100, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11, x = key_cache_internal_tensor_assign_5_1)[name = string("key_cache_internal_tensor_assign_6")]; string cast_83_dtype_0 = const()[name = string("cast_83_dtype_0"), val = string("fp16")]; tensor cast_83 = cast(dtype = cast_83_dtype_0, x = key_cache_internal_tensor_assign_6)[name = string("cast_124")]; write_state(data = cast_83, input = key_cache)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_82")]; string key_cache_internal_tensor_assign_6_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_6_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_11 = transpose(perm = v_state_11_perm_0, x = var_1189)[name = string("transpose_121")]; tensor value_cache_internal_tensor_assign_6 = slice_update(begin = concat_100, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11, x = value_cache_internal_tensor_assign_5_1)[name = string("value_cache_internal_tensor_assign_6")]; string cast_84_dtype_0 = const()[name = string("cast_84_dtype_0"), val = string("fp16")]; tensor cast_84 = cast(dtype = cast_84_dtype_0, x = value_cache_internal_tensor_assign_6)[name = string("cast_123")]; write_state(data = cast_84, input = value_cache)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_83")]; string value_cache_internal_tensor_assign_6_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_6_dtype_0"), val = string("fp32")]; tensor var_1246_begin_0 = const()[name = string("op_1246_begin_0"), val = tensor([5, 0, 0, 0, 0])]; tensor var_1246_end_0 = const()[name = string("op_1246_end_0"), val = tensor([6, 1, 2, 2048, 128])]; tensor var_1246_end_mask_0 = const()[name = string("op_1246_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1246_squeeze_mask_0 = const()[name = string("op_1246_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_1 = cast(dtype = key_cache_internal_tensor_assign_6_dtype_0, x = coreml_update_state_82)[name = string("cast_122")]; tensor var_1246 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, squeeze_mask = var_1246_squeeze_mask_0, x = key_cache_internal_tensor_assign_6_1)[name = string("op_1246")]; tensor var_1249_begin_0 = const()[name = string("op_1249_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1249_end_mask_0 = const()[name = string("op_1249_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1249 = slice_by_index(begin = var_1249_begin_0, end = concat_11, end_mask = var_1249_end_mask_0, x = var_1246)[name = string("op_1249")]; tensor var_1251_begin_0 = const()[name = string("op_1251_begin_0"), val = tensor([5, 0, 0, 0, 0])]; tensor var_1251_end_0 = const()[name = string("op_1251_end_0"), val = tensor([6, 1, 2, 2048, 128])]; tensor var_1251_end_mask_0 = const()[name = string("op_1251_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1251_squeeze_mask_0 = const()[name = string("op_1251_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_6_1 = cast(dtype = value_cache_internal_tensor_assign_6_dtype_0, x = coreml_update_state_83)[name = string("cast_121")]; tensor var_1251 = slice_by_index(begin = var_1251_begin_0, end = var_1251_end_0, end_mask = var_1251_end_mask_0, squeeze_mask = var_1251_squeeze_mask_0, x = value_cache_internal_tensor_assign_6_1)[name = string("op_1251")]; tensor var_1254_begin_0 = const()[name = string("op_1254_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1254_end_mask_0 = const()[name = string("op_1254_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1254 = slice_by_index(begin = var_1254_begin_0, end = concat_11, end_mask = var_1254_end_mask_0, x = var_1251)[name = string("op_1254")]; tensor var_1256_shape = shape(x = var_1249)[name = string("op_1256_shape")]; int32 gather_103 = const()[name = string("gather_103"), val = int32(1)]; int32 gather_104 = const()[name = string("gather_104"), val = int32(2)]; int32 select_105 = const()[name = string("select_105"), val = int32(2)]; int32 gather_105_axis_0 = const()[name = string("gather_105_axis_0"), val = int32(0)]; int32 gather_105_batch_dims_0 = const()[name = string("gather_105_batch_dims_0"), val = int32(0)]; bool gather_105_validate_indices_0 = const()[name = string("gather_105_validate_indices_0"), val = bool(false)]; int32 gather_105 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105, validate_indices = gather_105_validate_indices_0, x = var_1256_shape)[name = string("gather_105")]; int32 gather_106 = const()[name = string("gather_106"), val = int32(128)]; tensor var_1263_axes_0 = const()[name = string("op_1263_axes_0"), val = tensor([2])]; tensor var_1263 = expand_dims(axes = var_1263_axes_0, x = var_1249)[name = string("op_1263")]; tensor shape_117 = shape(x = var_1263)[name = string("shape_117")]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (gather_103, gather_104, var_100, gather_105, gather_106))[name = string("concat_108")]; tensor real_div_10 = real_div(x = concat_108, y = shape_117)[name = string("real_div_10")]; tensor hidden_states_161 = tile(reps = real_div_10, x = var_1263)[name = string("hidden_states_161")]; tensor concat_109x = const()[name = string("concat_109x"), val = tensor([1, 16, -1, 128])]; tensor key_states_23 = reshape(shape = concat_109x, x = hidden_states_161)[name = string("key_states_23")]; tensor var_1273_shape = shape(x = var_1254)[name = string("op_1273_shape")]; int32 gather_107 = const()[name = string("gather_107"), val = int32(1)]; int32 gather_108 = const()[name = string("gather_108"), val = int32(2)]; int32 select_109 = const()[name = string("select_109"), val = int32(2)]; int32 gather_109_axis_0 = const()[name = string("gather_109_axis_0"), val = int32(0)]; int32 gather_109_batch_dims_0 = const()[name = string("gather_109_batch_dims_0"), val = int32(0)]; bool gather_109_validate_indices_0 = const()[name = string("gather_109_validate_indices_0"), val = bool(false)]; int32 gather_109 = gather(axis = gather_109_axis_0, batch_dims = gather_109_batch_dims_0, indices = select_109, validate_indices = gather_109_validate_indices_0, x = var_1273_shape)[name = string("gather_109")]; int32 gather_110 = const()[name = string("gather_110"), val = int32(128)]; tensor var_1280_axes_0 = const()[name = string("op_1280_axes_0"), val = tensor([2])]; tensor var_1280 = expand_dims(axes = var_1280_axes_0, x = var_1254)[name = string("op_1280")]; tensor shape_122 = shape(x = var_1280)[name = string("shape_122")]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (gather_107, gather_108, var_100, gather_109, gather_110))[name = string("concat_110")]; tensor real_div_11 = real_div(x = concat_110, y = shape_122)[name = string("real_div_11")]; tensor hidden_states_165 = tile(reps = real_div_11, x = var_1280)[name = string("hidden_states_165")]; tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, 16, -1, 128])]; tensor value_states_23 = reshape(shape = concat_111x, x = hidden_states_165)[name = string("value_states_23")]; tensor var_1290_shape = shape(x = key_states_23)[name = string("op_1290_shape")]; int32 select_111 = const()[name = string("select_111"), val = int32(2)]; int32 gather_111_axis_0 = const()[name = string("gather_111_axis_0"), val = int32(0)]; int32 gather_111_batch_dims_0 = const()[name = string("gather_111_batch_dims_0"), val = int32(0)]; bool gather_111_validate_indices_0 = const()[name = string("gather_111_validate_indices_0"), val = bool(false)]; int32 gather_111 = gather(axis = gather_111_axis_0, batch_dims = gather_111_batch_dims_0, indices = select_111, validate_indices = gather_111_validate_indices_0, x = var_1290_shape)[name = string("gather_111")]; int32 concat_112_values0_0 = const()[name = string("concat_112_values0_0"), val = int32(1)]; int32 concat_112_values1_0 = const()[name = string("concat_112_values1_0"), val = int32(1)]; int32 concat_112_values2_0 = const()[name = string("concat_112_values2_0"), val = int32(0)]; int32 concat_112_axis_0 = const()[name = string("concat_112_axis_0"), val = int32(0)]; bool concat_112_interleave_0 = const()[name = string("concat_112_interleave_0"), val = bool(false)]; tensor concat_112 = concat(axis = concat_112_axis_0, interleave = concat_112_interleave_0, values = (concat_112_values0_0, concat_112_values1_0, concat_112_values2_0, gather_111))[name = string("concat_112")]; tensor causal_mask_13_begin_0 = const()[name = string("causal_mask_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_13_end_mask_0 = const()[name = string("causal_mask_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_13 = slice_by_index(begin = causal_mask_13_begin_0, end = concat_112, end_mask = causal_mask_13_end_mask_0, x = cast_0)[name = string("causal_mask_13")]; tensor attn_output_21 = scaled_dot_product_attention(attn_mask = causal_mask_13, key = key_states_23, query = query_states_23, value = value_states_23)[name = string("attn_output_21")]; tensor var_1296_perm_0 = const()[name = string("op_1296_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_113x = const()[name = string("concat_113x"), val = tensor([1, -1, 2048])]; tensor var_1296 = transpose(perm = var_1296_perm_0, x = attn_output_21)[name = string("transpose_120")]; tensor input_41 = reshape(shape = concat_113x, x = var_1296)[name = string("input_41")]; tensor linear_38 = linear(bias = linear_3_bias_0, weight = model_model_layers_5_self_attn_o_proj_weight_quantized, x = input_41)[name = string("linear_38")]; tensor hidden_states_169 = add(x = hidden_states_149, y = linear_38)[name = string("hidden_states_169")]; fp32 var_94_promoted_11 = const()[name = string("op_94_promoted_11"), val = fp32(0x1p+1)]; tensor var_1305 = pow(x = hidden_states_169, y = var_94_promoted_11)[name = string("op_1305")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; tensor variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1305)[name = string("variance_23")]; fp32 var_1308 = const()[name = string("op_1308"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1309 = add(x = variance_23, y = var_1308)[name = string("op_1309")]; fp32 var_1310_epsilon_0 = const()[name = string("op_1310_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1310 = rsqrt(epsilon = var_1310_epsilon_0, x = var_1309)[name = string("op_1310")]; tensor hidden_states_173 = mul(x = hidden_states_169, y = var_1310)[name = string("hidden_states_173")]; tensor input_43 = mul(x = model_model_layers_5_post_attention_layernorm_weight, y = hidden_states_173)[name = string("input_43")]; tensor linear_39 = linear(bias = linear_4_bias_0, weight = model_model_layers_5_mlp_gate_proj_weight_quantized, x = input_43)[name = string("linear_39")]; tensor var_1322 = silu(x = linear_39)[name = string("op_1322")]; tensor linear_40 = linear(bias = linear_4_bias_0, weight = model_model_layers_5_mlp_up_proj_weight_quantized, x = input_43)[name = string("linear_40")]; tensor input_47 = mul(x = var_1322, y = linear_40)[name = string("input_47")]; tensor linear_41 = linear(bias = linear_3_bias_0, weight = model_model_layers_5_mlp_down_proj_weight_quantized, x = input_47)[name = string("linear_41")]; tensor hidden_states_179 = add(x = hidden_states_169, y = linear_41)[name = string("hidden_states_179")]; fp32 var_94_promoted_12 = const()[name = string("op_94_promoted_12"), val = fp32(0x1p+1)]; tensor var_1335 = pow(x = hidden_states_179, y = var_94_promoted_12)[name = string("op_1335")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; tensor variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1335)[name = string("variance_25")]; fp32 var_1338 = const()[name = string("op_1338"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1339 = add(x = variance_25, y = var_1338)[name = string("op_1339")]; fp32 var_1340_epsilon_0 = const()[name = string("op_1340_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1340 = rsqrt(epsilon = var_1340_epsilon_0, x = var_1339)[name = string("op_1340")]; tensor hidden_states_183 = mul(x = hidden_states_179, y = var_1340)[name = string("hidden_states_183")]; tensor hidden_states_187 = mul(x = model_model_layers_6_input_layernorm_weight, y = hidden_states_183)[name = string("hidden_states_187")]; tensor linear_42 = linear(bias = model_model_layers_6_self_attn_q_proj_bias, weight = model_model_layers_6_self_attn_q_proj_weight_quantized, x = hidden_states_187)[name = string("linear_42")]; tensor linear_43 = linear(bias = model_model_layers_6_self_attn_k_proj_bias, weight = model_model_layers_6_self_attn_k_proj_weight_quantized, x = hidden_states_187)[name = string("linear_43")]; tensor linear_44 = linear(bias = model_model_layers_6_self_attn_v_proj_bias, weight = model_model_layers_6_self_attn_v_proj_weight_quantized, x = hidden_states_187)[name = string("linear_44")]; tensor concat_114x = const()[name = string("concat_114x"), val = tensor([1, -1, 16, 128])]; tensor var_1363 = reshape(shape = concat_114x, x = linear_42)[name = string("op_1363")]; tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_115x = const()[name = string("concat_115x"), val = tensor([1, -1, 2, 128])]; tensor var_1366 = reshape(shape = concat_115x, x = linear_43)[name = string("op_1366")]; tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_116x = const()[name = string("concat_116x"), val = tensor([1, -1, 2, 128])]; tensor var_1369 = reshape(shape = concat_116x, x = linear_44)[name = string("op_1369")]; tensor v_state_13_perm_0 = const()[name = string("v_state_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_13 = transpose(perm = q_13_perm_0, x = var_1363)[name = string("transpose_119")]; tensor var_1373 = mul(x = q_13, y = cos_7)[name = string("op_1373")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13)[name = string("x2_25")]; fp32 const_15_promoted = const()[name = string("const_15_promoted"), val = fp32(-0x1p+0)]; tensor var_1384 = mul(x = x2_25, y = const_15_promoted)[name = string("op_1384")]; bool var_1386_interleave_0 = const()[name = string("op_1386_interleave_0"), val = bool(false)]; tensor var_1386 = concat(axis = var_88, interleave = var_1386_interleave_0, values = (var_1384, x1_25))[name = string("op_1386")]; tensor var_1387 = mul(x = var_1386, y = sin_7)[name = string("op_1387")]; tensor query_states_27 = add(x = var_1373, y = var_1387)[name = string("query_states_27")]; tensor k_13 = transpose(perm = k_13_perm_0, x = var_1366)[name = string("transpose_118")]; tensor var_1389 = mul(x = k_13, y = cos_7)[name = string("op_1389")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13)[name = string("x2_27")]; fp32 const_16_promoted = const()[name = string("const_16_promoted"), val = fp32(-0x1p+0)]; tensor var_1400 = mul(x = x2_27, y = const_16_promoted)[name = string("op_1400")]; bool var_1402_interleave_0 = const()[name = string("op_1402_interleave_0"), val = bool(false)]; tensor var_1402 = concat(axis = var_88, interleave = var_1402_interleave_0, values = (var_1400, x1_27))[name = string("op_1402")]; tensor var_1403 = mul(x = var_1402, y = sin_7)[name = string("op_1403")]; tensor k_state_13 = add(x = var_1389, y = var_1403)[name = string("k_state_13")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor concat_119_values0_0 = const()[name = string("concat_119_values0_0"), val = tensor([6])]; int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (concat_119_values0_0, expand_dims_72, expand_dims_73, expand_dims_2, expand_dims_75))[name = string("concat_119")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7 = slice_update(begin = concat_119, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13, x = key_cache_internal_tensor_assign_6_1)[name = string("key_cache_internal_tensor_assign_7")]; string cast_97_dtype_0 = const()[name = string("cast_97_dtype_0"), val = string("fp16")]; tensor cast_97 = cast(dtype = cast_97_dtype_0, x = key_cache_internal_tensor_assign_7)[name = string("cast_120")]; write_state(data = cast_97, input = key_cache)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_84")]; string key_cache_internal_tensor_assign_7_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_7_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_13 = transpose(perm = v_state_13_perm_0, x = var_1369)[name = string("transpose_117")]; tensor value_cache_internal_tensor_assign_7 = slice_update(begin = concat_119, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13, x = value_cache_internal_tensor_assign_6_1)[name = string("value_cache_internal_tensor_assign_7")]; string cast_98_dtype_0 = const()[name = string("cast_98_dtype_0"), val = string("fp16")]; tensor cast_98 = cast(dtype = cast_98_dtype_0, x = value_cache_internal_tensor_assign_7)[name = string("cast_119")]; write_state(data = cast_98, input = value_cache)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_85")]; string value_cache_internal_tensor_assign_7_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_7_dtype_0"), val = string("fp32")]; tensor var_1426_begin_0 = const()[name = string("op_1426_begin_0"), val = tensor([6, 0, 0, 0, 0])]; tensor var_1426_end_0 = const()[name = string("op_1426_end_0"), val = tensor([7, 1, 2, 2048, 128])]; tensor var_1426_end_mask_0 = const()[name = string("op_1426_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1426_squeeze_mask_0 = const()[name = string("op_1426_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_1 = cast(dtype = key_cache_internal_tensor_assign_7_dtype_0, x = coreml_update_state_84)[name = string("cast_118")]; tensor var_1426 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, squeeze_mask = var_1426_squeeze_mask_0, x = key_cache_internal_tensor_assign_7_1)[name = string("op_1426")]; tensor var_1429_begin_0 = const()[name = string("op_1429_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1429_end_mask_0 = const()[name = string("op_1429_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1429 = slice_by_index(begin = var_1429_begin_0, end = concat_11, end_mask = var_1429_end_mask_0, x = var_1426)[name = string("op_1429")]; tensor var_1431_begin_0 = const()[name = string("op_1431_begin_0"), val = tensor([6, 0, 0, 0, 0])]; tensor var_1431_end_0 = const()[name = string("op_1431_end_0"), val = tensor([7, 1, 2, 2048, 128])]; tensor var_1431_end_mask_0 = const()[name = string("op_1431_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1431_squeeze_mask_0 = const()[name = string("op_1431_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_7_1 = cast(dtype = value_cache_internal_tensor_assign_7_dtype_0, x = coreml_update_state_85)[name = string("cast_117")]; tensor var_1431 = slice_by_index(begin = var_1431_begin_0, end = var_1431_end_0, end_mask = var_1431_end_mask_0, squeeze_mask = var_1431_squeeze_mask_0, x = value_cache_internal_tensor_assign_7_1)[name = string("op_1431")]; tensor var_1434_begin_0 = const()[name = string("op_1434_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1434_end_mask_0 = const()[name = string("op_1434_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1434 = slice_by_index(begin = var_1434_begin_0, end = concat_11, end_mask = var_1434_end_mask_0, x = var_1431)[name = string("op_1434")]; tensor var_1436_shape = shape(x = var_1429)[name = string("op_1436_shape")]; int32 gather_121 = const()[name = string("gather_121"), val = int32(1)]; int32 gather_122 = const()[name = string("gather_122"), val = int32(2)]; int32 select_123 = const()[name = string("select_123"), val = int32(2)]; int32 gather_123_axis_0 = const()[name = string("gather_123_axis_0"), val = int32(0)]; int32 gather_123_batch_dims_0 = const()[name = string("gather_123_batch_dims_0"), val = int32(0)]; bool gather_123_validate_indices_0 = const()[name = string("gather_123_validate_indices_0"), val = bool(false)]; int32 gather_123 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123, validate_indices = gather_123_validate_indices_0, x = var_1436_shape)[name = string("gather_123")]; int32 gather_124 = const()[name = string("gather_124"), val = int32(128)]; tensor var_1443_axes_0 = const()[name = string("op_1443_axes_0"), val = tensor([2])]; tensor var_1443 = expand_dims(axes = var_1443_axes_0, x = var_1429)[name = string("op_1443")]; tensor shape_137 = shape(x = var_1443)[name = string("shape_137")]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (gather_121, gather_122, var_100, gather_123, gather_124))[name = string("concat_127")]; tensor real_div_12 = real_div(x = concat_127, y = shape_137)[name = string("real_div_12")]; tensor hidden_states_191 = tile(reps = real_div_12, x = var_1443)[name = string("hidden_states_191")]; tensor concat_128x = const()[name = string("concat_128x"), val = tensor([1, 16, -1, 128])]; tensor key_states_27 = reshape(shape = concat_128x, x = hidden_states_191)[name = string("key_states_27")]; tensor var_1453_shape = shape(x = var_1434)[name = string("op_1453_shape")]; int32 gather_125 = const()[name = string("gather_125"), val = int32(1)]; int32 gather_126 = const()[name = string("gather_126"), val = int32(2)]; int32 select_127 = const()[name = string("select_127"), val = int32(2)]; int32 gather_127_axis_0 = const()[name = string("gather_127_axis_0"), val = int32(0)]; int32 gather_127_batch_dims_0 = const()[name = string("gather_127_batch_dims_0"), val = int32(0)]; bool gather_127_validate_indices_0 = const()[name = string("gather_127_validate_indices_0"), val = bool(false)]; int32 gather_127 = gather(axis = gather_127_axis_0, batch_dims = gather_127_batch_dims_0, indices = select_127, validate_indices = gather_127_validate_indices_0, x = var_1453_shape)[name = string("gather_127")]; int32 gather_128 = const()[name = string("gather_128"), val = int32(128)]; tensor var_1460_axes_0 = const()[name = string("op_1460_axes_0"), val = tensor([2])]; tensor var_1460 = expand_dims(axes = var_1460_axes_0, x = var_1434)[name = string("op_1460")]; tensor shape_142 = shape(x = var_1460)[name = string("shape_142")]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (gather_125, gather_126, var_100, gather_127, gather_128))[name = string("concat_129")]; tensor real_div_13 = real_div(x = concat_129, y = shape_142)[name = string("real_div_13")]; tensor hidden_states_195 = tile(reps = real_div_13, x = var_1460)[name = string("hidden_states_195")]; tensor concat_130x = const()[name = string("concat_130x"), val = tensor([1, 16, -1, 128])]; tensor value_states_27 = reshape(shape = concat_130x, x = hidden_states_195)[name = string("value_states_27")]; tensor var_1470_shape = shape(x = key_states_27)[name = string("op_1470_shape")]; int32 select_129 = const()[name = string("select_129"), val = int32(2)]; int32 gather_129_axis_0 = const()[name = string("gather_129_axis_0"), val = int32(0)]; int32 gather_129_batch_dims_0 = const()[name = string("gather_129_batch_dims_0"), val = int32(0)]; bool gather_129_validate_indices_0 = const()[name = string("gather_129_validate_indices_0"), val = bool(false)]; int32 gather_129 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = select_129, validate_indices = gather_129_validate_indices_0, x = var_1470_shape)[name = string("gather_129")]; int32 concat_131_values0_0 = const()[name = string("concat_131_values0_0"), val = int32(1)]; int32 concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = int32(1)]; int32 concat_131_values2_0 = const()[name = string("concat_131_values2_0"), val = int32(0)]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (concat_131_values0_0, concat_131_values1_0, concat_131_values2_0, gather_129))[name = string("concat_131")]; tensor causal_mask_15_begin_0 = const()[name = string("causal_mask_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_15_end_mask_0 = const()[name = string("causal_mask_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_15 = slice_by_index(begin = causal_mask_15_begin_0, end = concat_131, end_mask = causal_mask_15_end_mask_0, x = cast_0)[name = string("causal_mask_15")]; tensor attn_output_25 = scaled_dot_product_attention(attn_mask = causal_mask_15, key = key_states_27, query = query_states_27, value = value_states_27)[name = string("attn_output_25")]; tensor var_1476_perm_0 = const()[name = string("op_1476_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 2048])]; tensor var_1476 = transpose(perm = var_1476_perm_0, x = attn_output_25)[name = string("transpose_116")]; tensor input_49 = reshape(shape = concat_132x, x = var_1476)[name = string("input_49")]; tensor linear_45 = linear(bias = linear_3_bias_0, weight = model_model_layers_6_self_attn_o_proj_weight_quantized, x = input_49)[name = string("linear_45")]; tensor hidden_states_199 = add(x = hidden_states_179, y = linear_45)[name = string("hidden_states_199")]; fp32 var_94_promoted_13 = const()[name = string("op_94_promoted_13"), val = fp32(0x1p+1)]; tensor var_1485 = pow(x = hidden_states_199, y = var_94_promoted_13)[name = string("op_1485")]; tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; tensor variance_27 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1485)[name = string("variance_27")]; fp32 var_1488 = const()[name = string("op_1488"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1489 = add(x = variance_27, y = var_1488)[name = string("op_1489")]; fp32 var_1490_epsilon_0 = const()[name = string("op_1490_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1490 = rsqrt(epsilon = var_1490_epsilon_0, x = var_1489)[name = string("op_1490")]; tensor hidden_states_203 = mul(x = hidden_states_199, y = var_1490)[name = string("hidden_states_203")]; tensor input_51 = mul(x = model_model_layers_6_post_attention_layernorm_weight, y = hidden_states_203)[name = string("input_51")]; tensor linear_46 = linear(bias = linear_4_bias_0, weight = model_model_layers_6_mlp_gate_proj_weight_quantized, x = input_51)[name = string("linear_46")]; tensor var_1502 = silu(x = linear_46)[name = string("op_1502")]; tensor linear_47 = linear(bias = linear_4_bias_0, weight = model_model_layers_6_mlp_up_proj_weight_quantized, x = input_51)[name = string("linear_47")]; tensor input_55 = mul(x = var_1502, y = linear_47)[name = string("input_55")]; tensor linear_48 = linear(bias = linear_3_bias_0, weight = model_model_layers_6_mlp_down_proj_weight_quantized, x = input_55)[name = string("linear_48")]; tensor hidden_states_209 = add(x = hidden_states_199, y = linear_48)[name = string("hidden_states_209")]; fp32 var_94_promoted_14 = const()[name = string("op_94_promoted_14"), val = fp32(0x1p+1)]; tensor var_1515 = pow(x = hidden_states_209, y = var_94_promoted_14)[name = string("op_1515")]; tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; tensor variance_29 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1515)[name = string("variance_29")]; fp32 var_1518 = const()[name = string("op_1518"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1519 = add(x = variance_29, y = var_1518)[name = string("op_1519")]; fp32 var_1520_epsilon_0 = const()[name = string("op_1520_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1520 = rsqrt(epsilon = var_1520_epsilon_0, x = var_1519)[name = string("op_1520")]; tensor hidden_states_213 = mul(x = hidden_states_209, y = var_1520)[name = string("hidden_states_213")]; tensor hidden_states_217 = mul(x = model_model_layers_7_input_layernorm_weight, y = hidden_states_213)[name = string("hidden_states_217")]; tensor linear_49 = linear(bias = model_model_layers_7_self_attn_q_proj_bias, weight = model_model_layers_7_self_attn_q_proj_weight_quantized, x = hidden_states_217)[name = string("linear_49")]; tensor linear_50 = linear(bias = model_model_layers_7_self_attn_k_proj_bias, weight = model_model_layers_7_self_attn_k_proj_weight_quantized, x = hidden_states_217)[name = string("linear_50")]; tensor linear_51 = linear(bias = model_model_layers_7_self_attn_v_proj_bias, weight = model_model_layers_7_self_attn_v_proj_weight_quantized, x = hidden_states_217)[name = string("linear_51")]; tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 16, 128])]; tensor var_1543 = reshape(shape = concat_133x, x = linear_49)[name = string("op_1543")]; tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_134x = const()[name = string("concat_134x"), val = tensor([1, -1, 2, 128])]; tensor var_1546 = reshape(shape = concat_134x, x = linear_50)[name = string("op_1546")]; tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_135x = const()[name = string("concat_135x"), val = tensor([1, -1, 2, 128])]; tensor var_1549 = reshape(shape = concat_135x, x = linear_51)[name = string("op_1549")]; tensor v_state_15_perm_0 = const()[name = string("v_state_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_15 = transpose(perm = q_15_perm_0, x = var_1543)[name = string("transpose_115")]; tensor var_1553 = mul(x = q_15, y = cos_7)[name = string("op_1553")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15)[name = string("x2_29")]; fp32 const_17_promoted = const()[name = string("const_17_promoted"), val = fp32(-0x1p+0)]; tensor var_1564 = mul(x = x2_29, y = const_17_promoted)[name = string("op_1564")]; bool var_1566_interleave_0 = const()[name = string("op_1566_interleave_0"), val = bool(false)]; tensor var_1566 = concat(axis = var_88, interleave = var_1566_interleave_0, values = (var_1564, x1_29))[name = string("op_1566")]; tensor var_1567 = mul(x = var_1566, y = sin_7)[name = string("op_1567")]; tensor query_states_31 = add(x = var_1553, y = var_1567)[name = string("query_states_31")]; tensor k_15 = transpose(perm = k_15_perm_0, x = var_1546)[name = string("transpose_114")]; tensor var_1569 = mul(x = k_15, y = cos_7)[name = string("op_1569")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15)[name = string("x2_31")]; fp32 const_18_promoted = const()[name = string("const_18_promoted"), val = fp32(-0x1p+0)]; tensor var_1580 = mul(x = x2_31, y = const_18_promoted)[name = string("op_1580")]; bool var_1582_interleave_0 = const()[name = string("op_1582_interleave_0"), val = bool(false)]; tensor var_1582 = concat(axis = var_88, interleave = var_1582_interleave_0, values = (var_1580, x1_31))[name = string("op_1582")]; tensor var_1583 = mul(x = var_1582, y = sin_7)[name = string("op_1583")]; tensor k_state_15 = add(x = var_1569, y = var_1583)[name = string("k_state_15")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([0])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor([7])]; int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, expand_dims_84, expand_dims_85, expand_dims_2, expand_dims_87))[name = string("concat_138")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8 = slice_update(begin = concat_138, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15, x = key_cache_internal_tensor_assign_7_1)[name = string("key_cache_internal_tensor_assign_8")]; string cast_111_dtype_0 = const()[name = string("cast_111_dtype_0"), val = string("fp16")]; tensor cast_111 = cast(dtype = cast_111_dtype_0, x = key_cache_internal_tensor_assign_8)[name = string("cast_116")]; write_state(data = cast_111, input = key_cache)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_86")]; string key_cache_internal_tensor_assign_8_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_8_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_15 = transpose(perm = v_state_15_perm_0, x = var_1549)[name = string("transpose_113")]; tensor value_cache_internal_tensor_assign_8 = slice_update(begin = concat_138, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15, x = value_cache_internal_tensor_assign_7_1)[name = string("value_cache_internal_tensor_assign_8")]; string cast_112_dtype_0 = const()[name = string("cast_112_dtype_0"), val = string("fp16")]; tensor cast_112 = cast(dtype = cast_112_dtype_0, x = value_cache_internal_tensor_assign_8)[name = string("cast_115")]; write_state(data = cast_112, input = value_cache)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_87")]; string value_cache_internal_tensor_assign_8_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_8_dtype_0"), val = string("fp32")]; tensor var_1606_begin_0 = const()[name = string("op_1606_begin_0"), val = tensor([7, 0, 0, 0, 0])]; tensor var_1606_end_0 = const()[name = string("op_1606_end_0"), val = tensor([8, 1, 2, 2048, 128])]; tensor var_1606_end_mask_0 = const()[name = string("op_1606_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1606_squeeze_mask_0 = const()[name = string("op_1606_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_1 = cast(dtype = key_cache_internal_tensor_assign_8_dtype_0, x = coreml_update_state_86)[name = string("cast_114")]; tensor var_1606 = slice_by_index(begin = var_1606_begin_0, end = var_1606_end_0, end_mask = var_1606_end_mask_0, squeeze_mask = var_1606_squeeze_mask_0, x = key_cache_internal_tensor_assign_8_1)[name = string("op_1606")]; tensor var_1609_begin_0 = const()[name = string("op_1609_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1609_end_mask_0 = const()[name = string("op_1609_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1609 = slice_by_index(begin = var_1609_begin_0, end = concat_11, end_mask = var_1609_end_mask_0, x = var_1606)[name = string("op_1609")]; tensor var_1611_begin_0 = const()[name = string("op_1611_begin_0"), val = tensor([7, 0, 0, 0, 0])]; tensor var_1611_end_0 = const()[name = string("op_1611_end_0"), val = tensor([8, 1, 2, 2048, 128])]; tensor var_1611_end_mask_0 = const()[name = string("op_1611_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1611_squeeze_mask_0 = const()[name = string("op_1611_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_8_1 = cast(dtype = value_cache_internal_tensor_assign_8_dtype_0, x = coreml_update_state_87)[name = string("cast_113")]; tensor var_1611 = slice_by_index(begin = var_1611_begin_0, end = var_1611_end_0, end_mask = var_1611_end_mask_0, squeeze_mask = var_1611_squeeze_mask_0, x = value_cache_internal_tensor_assign_8_1)[name = string("op_1611")]; tensor var_1614_begin_0 = const()[name = string("op_1614_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1614_end_mask_0 = const()[name = string("op_1614_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1614 = slice_by_index(begin = var_1614_begin_0, end = concat_11, end_mask = var_1614_end_mask_0, x = var_1611)[name = string("op_1614")]; tensor var_1616_shape = shape(x = var_1609)[name = string("op_1616_shape")]; int32 gather_139 = const()[name = string("gather_139"), val = int32(1)]; int32 gather_140 = const()[name = string("gather_140"), val = int32(2)]; int32 select_141 = const()[name = string("select_141"), val = int32(2)]; int32 gather_141_axis_0 = const()[name = string("gather_141_axis_0"), val = int32(0)]; int32 gather_141_batch_dims_0 = const()[name = string("gather_141_batch_dims_0"), val = int32(0)]; bool gather_141_validate_indices_0 = const()[name = string("gather_141_validate_indices_0"), val = bool(false)]; int32 gather_141 = gather(axis = gather_141_axis_0, batch_dims = gather_141_batch_dims_0, indices = select_141, validate_indices = gather_141_validate_indices_0, x = var_1616_shape)[name = string("gather_141")]; int32 gather_142 = const()[name = string("gather_142"), val = int32(128)]; tensor var_1623_axes_0 = const()[name = string("op_1623_axes_0"), val = tensor([2])]; tensor var_1623 = expand_dims(axes = var_1623_axes_0, x = var_1609)[name = string("op_1623")]; tensor shape_157 = shape(x = var_1623)[name = string("shape_157")]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (gather_139, gather_140, var_100, gather_141, gather_142))[name = string("concat_146")]; tensor real_div_14 = real_div(x = concat_146, y = shape_157)[name = string("real_div_14")]; tensor hidden_states_221 = tile(reps = real_div_14, x = var_1623)[name = string("hidden_states_221")]; tensor concat_147x = const()[name = string("concat_147x"), val = tensor([1, 16, -1, 128])]; tensor key_states_31 = reshape(shape = concat_147x, x = hidden_states_221)[name = string("key_states_31")]; tensor var_1633_shape = shape(x = var_1614)[name = string("op_1633_shape")]; int32 gather_143 = const()[name = string("gather_143"), val = int32(1)]; int32 gather_144 = const()[name = string("gather_144"), val = int32(2)]; int32 select_145 = const()[name = string("select_145"), val = int32(2)]; int32 gather_145_axis_0 = const()[name = string("gather_145_axis_0"), val = int32(0)]; int32 gather_145_batch_dims_0 = const()[name = string("gather_145_batch_dims_0"), val = int32(0)]; bool gather_145_validate_indices_0 = const()[name = string("gather_145_validate_indices_0"), val = bool(false)]; int32 gather_145 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145, validate_indices = gather_145_validate_indices_0, x = var_1633_shape)[name = string("gather_145")]; int32 gather_146 = const()[name = string("gather_146"), val = int32(128)]; tensor var_1640_axes_0 = const()[name = string("op_1640_axes_0"), val = tensor([2])]; tensor var_1640 = expand_dims(axes = var_1640_axes_0, x = var_1614)[name = string("op_1640")]; tensor shape_162 = shape(x = var_1640)[name = string("shape_162")]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (gather_143, gather_144, var_100, gather_145, gather_146))[name = string("concat_148")]; tensor real_div_15 = real_div(x = concat_148, y = shape_162)[name = string("real_div_15")]; tensor hidden_states_225 = tile(reps = real_div_15, x = var_1640)[name = string("hidden_states_225")]; tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, 16, -1, 128])]; tensor value_states_31 = reshape(shape = concat_149x, x = hidden_states_225)[name = string("value_states_31")]; tensor var_1650_shape = shape(x = key_states_31)[name = string("op_1650_shape")]; int32 select_147 = const()[name = string("select_147"), val = int32(2)]; int32 gather_147_axis_0 = const()[name = string("gather_147_axis_0"), val = int32(0)]; int32 gather_147_batch_dims_0 = const()[name = string("gather_147_batch_dims_0"), val = int32(0)]; bool gather_147_validate_indices_0 = const()[name = string("gather_147_validate_indices_0"), val = bool(false)]; int32 gather_147 = gather(axis = gather_147_axis_0, batch_dims = gather_147_batch_dims_0, indices = select_147, validate_indices = gather_147_validate_indices_0, x = var_1650_shape)[name = string("gather_147")]; int32 concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = int32(1)]; int32 concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = int32(1)]; int32 concat_150_values2_0 = const()[name = string("concat_150_values2_0"), val = int32(0)]; int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, concat_150_values2_0, gather_147))[name = string("concat_150")]; tensor causal_mask_17_begin_0 = const()[name = string("causal_mask_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_17_end_mask_0 = const()[name = string("causal_mask_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_17 = slice_by_index(begin = causal_mask_17_begin_0, end = concat_150, end_mask = causal_mask_17_end_mask_0, x = cast_0)[name = string("causal_mask_17")]; tensor attn_output_29 = scaled_dot_product_attention(attn_mask = causal_mask_17, key = key_states_31, query = query_states_31, value = value_states_31)[name = string("attn_output_29")]; tensor var_1656_perm_0 = const()[name = string("op_1656_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_151x = const()[name = string("concat_151x"), val = tensor([1, -1, 2048])]; tensor var_1656 = transpose(perm = var_1656_perm_0, x = attn_output_29)[name = string("transpose_112")]; tensor input_57 = reshape(shape = concat_151x, x = var_1656)[name = string("input_57")]; tensor linear_52 = linear(bias = linear_3_bias_0, weight = model_model_layers_7_self_attn_o_proj_weight_quantized, x = input_57)[name = string("linear_52")]; tensor hidden_states_229 = add(x = hidden_states_209, y = linear_52)[name = string("hidden_states_229")]; fp32 var_94_promoted_15 = const()[name = string("op_94_promoted_15"), val = fp32(0x1p+1)]; tensor var_1665 = pow(x = hidden_states_229, y = var_94_promoted_15)[name = string("op_1665")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; tensor variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1665)[name = string("variance_31")]; fp32 var_1668 = const()[name = string("op_1668"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1669 = add(x = variance_31, y = var_1668)[name = string("op_1669")]; fp32 var_1670_epsilon_0 = const()[name = string("op_1670_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1670 = rsqrt(epsilon = var_1670_epsilon_0, x = var_1669)[name = string("op_1670")]; tensor hidden_states_233 = mul(x = hidden_states_229, y = var_1670)[name = string("hidden_states_233")]; tensor input_59 = mul(x = model_model_layers_7_post_attention_layernorm_weight, y = hidden_states_233)[name = string("input_59")]; tensor linear_53 = linear(bias = linear_4_bias_0, weight = model_model_layers_7_mlp_gate_proj_weight_quantized, x = input_59)[name = string("linear_53")]; tensor var_1682 = silu(x = linear_53)[name = string("op_1682")]; tensor linear_54 = linear(bias = linear_4_bias_0, weight = model_model_layers_7_mlp_up_proj_weight_quantized, x = input_59)[name = string("linear_54")]; tensor input_63 = mul(x = var_1682, y = linear_54)[name = string("input_63")]; tensor linear_55 = linear(bias = linear_3_bias_0, weight = model_model_layers_7_mlp_down_proj_weight_quantized, x = input_63)[name = string("linear_55")]; tensor hidden_states_239 = add(x = hidden_states_229, y = linear_55)[name = string("hidden_states_239")]; fp32 var_94_promoted_16 = const()[name = string("op_94_promoted_16"), val = fp32(0x1p+1)]; tensor var_1695 = pow(x = hidden_states_239, y = var_94_promoted_16)[name = string("op_1695")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; tensor variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1695)[name = string("variance_33")]; fp32 var_1698 = const()[name = string("op_1698"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1699 = add(x = variance_33, y = var_1698)[name = string("op_1699")]; fp32 var_1700_epsilon_0 = const()[name = string("op_1700_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1700 = rsqrt(epsilon = var_1700_epsilon_0, x = var_1699)[name = string("op_1700")]; tensor hidden_states_243 = mul(x = hidden_states_239, y = var_1700)[name = string("hidden_states_243")]; tensor hidden_states_247 = mul(x = model_model_layers_8_input_layernorm_weight, y = hidden_states_243)[name = string("hidden_states_247")]; tensor linear_56 = linear(bias = model_model_layers_8_self_attn_q_proj_bias, weight = model_model_layers_8_self_attn_q_proj_weight_quantized, x = hidden_states_247)[name = string("linear_56")]; tensor linear_57 = linear(bias = model_model_layers_8_self_attn_k_proj_bias, weight = model_model_layers_8_self_attn_k_proj_weight_quantized, x = hidden_states_247)[name = string("linear_57")]; tensor linear_58 = linear(bias = model_model_layers_8_self_attn_v_proj_bias, weight = model_model_layers_8_self_attn_v_proj_weight_quantized, x = hidden_states_247)[name = string("linear_58")]; tensor concat_152x = const()[name = string("concat_152x"), val = tensor([1, -1, 16, 128])]; tensor var_1723 = reshape(shape = concat_152x, x = linear_56)[name = string("op_1723")]; tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_153x = const()[name = string("concat_153x"), val = tensor([1, -1, 2, 128])]; tensor var_1726 = reshape(shape = concat_153x, x = linear_57)[name = string("op_1726")]; tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 2, 128])]; tensor var_1729 = reshape(shape = concat_154x, x = linear_58)[name = string("op_1729")]; tensor v_state_17_perm_0 = const()[name = string("v_state_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_17 = transpose(perm = q_17_perm_0, x = var_1723)[name = string("transpose_111")]; tensor var_1733 = mul(x = q_17, y = cos_7)[name = string("op_1733")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17)[name = string("x2_33")]; fp32 const_19_promoted = const()[name = string("const_19_promoted"), val = fp32(-0x1p+0)]; tensor var_1744 = mul(x = x2_33, y = const_19_promoted)[name = string("op_1744")]; bool var_1746_interleave_0 = const()[name = string("op_1746_interleave_0"), val = bool(false)]; tensor var_1746 = concat(axis = var_88, interleave = var_1746_interleave_0, values = (var_1744, x1_33))[name = string("op_1746")]; tensor var_1747 = mul(x = var_1746, y = sin_7)[name = string("op_1747")]; tensor query_states_35 = add(x = var_1733, y = var_1747)[name = string("query_states_35")]; tensor k_17 = transpose(perm = k_17_perm_0, x = var_1726)[name = string("transpose_110")]; tensor var_1749 = mul(x = k_17, y = cos_7)[name = string("op_1749")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17)[name = string("x2_35")]; fp32 const_20_promoted = const()[name = string("const_20_promoted"), val = fp32(-0x1p+0)]; tensor var_1760 = mul(x = x2_35, y = const_20_promoted)[name = string("op_1760")]; bool var_1762_interleave_0 = const()[name = string("op_1762_interleave_0"), val = bool(false)]; tensor var_1762 = concat(axis = var_88, interleave = var_1762_interleave_0, values = (var_1760, x1_35))[name = string("op_1762")]; tensor var_1763 = mul(x = var_1762, y = sin_7)[name = string("op_1763")]; tensor k_state_17 = add(x = var_1749, y = var_1763)[name = string("k_state_17")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor concat_157_values0_0 = const()[name = string("concat_157_values0_0"), val = tensor([8])]; int32 concat_157_axis_0 = const()[name = string("concat_157_axis_0"), val = int32(0)]; bool concat_157_interleave_0 = const()[name = string("concat_157_interleave_0"), val = bool(false)]; tensor concat_157 = concat(axis = concat_157_axis_0, interleave = concat_157_interleave_0, values = (concat_157_values0_0, expand_dims_96, expand_dims_97, expand_dims_2, expand_dims_99))[name = string("concat_157")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9 = slice_update(begin = concat_157, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17, x = key_cache_internal_tensor_assign_8_1)[name = string("key_cache_internal_tensor_assign_9")]; string cast_125_dtype_0 = const()[name = string("cast_125_dtype_0"), val = string("fp16")]; tensor cast_125 = cast(dtype = cast_125_dtype_0, x = key_cache_internal_tensor_assign_9)[name = string("cast_112")]; write_state(data = cast_125, input = key_cache)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_88")]; string key_cache_internal_tensor_assign_9_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_9_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_17 = transpose(perm = v_state_17_perm_0, x = var_1729)[name = string("transpose_109")]; tensor value_cache_internal_tensor_assign_9 = slice_update(begin = concat_157, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17, x = value_cache_internal_tensor_assign_8_1)[name = string("value_cache_internal_tensor_assign_9")]; string cast_126_dtype_0 = const()[name = string("cast_126_dtype_0"), val = string("fp16")]; tensor cast_126 = cast(dtype = cast_126_dtype_0, x = value_cache_internal_tensor_assign_9)[name = string("cast_111")]; write_state(data = cast_126, input = value_cache)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_89")]; string value_cache_internal_tensor_assign_9_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_9_dtype_0"), val = string("fp32")]; tensor var_1786_begin_0 = const()[name = string("op_1786_begin_0"), val = tensor([8, 0, 0, 0, 0])]; tensor var_1786_end_0 = const()[name = string("op_1786_end_0"), val = tensor([9, 1, 2, 2048, 128])]; tensor var_1786_end_mask_0 = const()[name = string("op_1786_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1786_squeeze_mask_0 = const()[name = string("op_1786_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_1 = cast(dtype = key_cache_internal_tensor_assign_9_dtype_0, x = coreml_update_state_88)[name = string("cast_110")]; tensor var_1786 = slice_by_index(begin = var_1786_begin_0, end = var_1786_end_0, end_mask = var_1786_end_mask_0, squeeze_mask = var_1786_squeeze_mask_0, x = key_cache_internal_tensor_assign_9_1)[name = string("op_1786")]; tensor var_1789_begin_0 = const()[name = string("op_1789_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1789_end_mask_0 = const()[name = string("op_1789_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1789 = slice_by_index(begin = var_1789_begin_0, end = concat_11, end_mask = var_1789_end_mask_0, x = var_1786)[name = string("op_1789")]; tensor var_1791_begin_0 = const()[name = string("op_1791_begin_0"), val = tensor([8, 0, 0, 0, 0])]; tensor var_1791_end_0 = const()[name = string("op_1791_end_0"), val = tensor([9, 1, 2, 2048, 128])]; tensor var_1791_end_mask_0 = const()[name = string("op_1791_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1791_squeeze_mask_0 = const()[name = string("op_1791_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_9_1 = cast(dtype = value_cache_internal_tensor_assign_9_dtype_0, x = coreml_update_state_89)[name = string("cast_109")]; tensor var_1791 = slice_by_index(begin = var_1791_begin_0, end = var_1791_end_0, end_mask = var_1791_end_mask_0, squeeze_mask = var_1791_squeeze_mask_0, x = value_cache_internal_tensor_assign_9_1)[name = string("op_1791")]; tensor var_1794_begin_0 = const()[name = string("op_1794_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1794_end_mask_0 = const()[name = string("op_1794_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1794 = slice_by_index(begin = var_1794_begin_0, end = concat_11, end_mask = var_1794_end_mask_0, x = var_1791)[name = string("op_1794")]; tensor var_1796_shape = shape(x = var_1789)[name = string("op_1796_shape")]; int32 gather_157 = const()[name = string("gather_157"), val = int32(1)]; int32 gather_158 = const()[name = string("gather_158"), val = int32(2)]; int32 select_159 = const()[name = string("select_159"), val = int32(2)]; int32 gather_159_axis_0 = const()[name = string("gather_159_axis_0"), val = int32(0)]; int32 gather_159_batch_dims_0 = const()[name = string("gather_159_batch_dims_0"), val = int32(0)]; bool gather_159_validate_indices_0 = const()[name = string("gather_159_validate_indices_0"), val = bool(false)]; int32 gather_159 = gather(axis = gather_159_axis_0, batch_dims = gather_159_batch_dims_0, indices = select_159, validate_indices = gather_159_validate_indices_0, x = var_1796_shape)[name = string("gather_159")]; int32 gather_160 = const()[name = string("gather_160"), val = int32(128)]; tensor var_1803_axes_0 = const()[name = string("op_1803_axes_0"), val = tensor([2])]; tensor var_1803 = expand_dims(axes = var_1803_axes_0, x = var_1789)[name = string("op_1803")]; tensor shape_177 = shape(x = var_1803)[name = string("shape_177")]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (gather_157, gather_158, var_100, gather_159, gather_160))[name = string("concat_165")]; tensor real_div_16 = real_div(x = concat_165, y = shape_177)[name = string("real_div_16")]; tensor hidden_states_251 = tile(reps = real_div_16, x = var_1803)[name = string("hidden_states_251")]; tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, 16, -1, 128])]; tensor key_states_35 = reshape(shape = concat_166x, x = hidden_states_251)[name = string("key_states_35")]; tensor var_1813_shape = shape(x = var_1794)[name = string("op_1813_shape")]; int32 gather_161 = const()[name = string("gather_161"), val = int32(1)]; int32 gather_162 = const()[name = string("gather_162"), val = int32(2)]; int32 select_163 = const()[name = string("select_163"), val = int32(2)]; int32 gather_163_axis_0 = const()[name = string("gather_163_axis_0"), val = int32(0)]; int32 gather_163_batch_dims_0 = const()[name = string("gather_163_batch_dims_0"), val = int32(0)]; bool gather_163_validate_indices_0 = const()[name = string("gather_163_validate_indices_0"), val = bool(false)]; int32 gather_163 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163, validate_indices = gather_163_validate_indices_0, x = var_1813_shape)[name = string("gather_163")]; int32 gather_164 = const()[name = string("gather_164"), val = int32(128)]; tensor var_1820_axes_0 = const()[name = string("op_1820_axes_0"), val = tensor([2])]; tensor var_1820 = expand_dims(axes = var_1820_axes_0, x = var_1794)[name = string("op_1820")]; tensor shape_182 = shape(x = var_1820)[name = string("shape_182")]; int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (gather_161, gather_162, var_100, gather_163, gather_164))[name = string("concat_167")]; tensor real_div_17 = real_div(x = concat_167, y = shape_182)[name = string("real_div_17")]; tensor hidden_states_255 = tile(reps = real_div_17, x = var_1820)[name = string("hidden_states_255")]; tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, 16, -1, 128])]; tensor value_states_35 = reshape(shape = concat_168x, x = hidden_states_255)[name = string("value_states_35")]; tensor var_1830_shape = shape(x = key_states_35)[name = string("op_1830_shape")]; int32 select_165 = const()[name = string("select_165"), val = int32(2)]; int32 gather_165_axis_0 = const()[name = string("gather_165_axis_0"), val = int32(0)]; int32 gather_165_batch_dims_0 = const()[name = string("gather_165_batch_dims_0"), val = int32(0)]; bool gather_165_validate_indices_0 = const()[name = string("gather_165_validate_indices_0"), val = bool(false)]; int32 gather_165 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165, validate_indices = gather_165_validate_indices_0, x = var_1830_shape)[name = string("gather_165")]; int32 concat_169_values0_0 = const()[name = string("concat_169_values0_0"), val = int32(1)]; int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(1)]; int32 concat_169_values2_0 = const()[name = string("concat_169_values2_0"), val = int32(0)]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (concat_169_values0_0, concat_169_values1_0, concat_169_values2_0, gather_165))[name = string("concat_169")]; tensor causal_mask_19_begin_0 = const()[name = string("causal_mask_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_19_end_mask_0 = const()[name = string("causal_mask_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_19 = slice_by_index(begin = causal_mask_19_begin_0, end = concat_169, end_mask = causal_mask_19_end_mask_0, x = cast_0)[name = string("causal_mask_19")]; tensor attn_output_33 = scaled_dot_product_attention(attn_mask = causal_mask_19, key = key_states_35, query = query_states_35, value = value_states_35)[name = string("attn_output_33")]; tensor var_1836_perm_0 = const()[name = string("op_1836_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_170x = const()[name = string("concat_170x"), val = tensor([1, -1, 2048])]; tensor var_1836 = transpose(perm = var_1836_perm_0, x = attn_output_33)[name = string("transpose_108")]; tensor input_65 = reshape(shape = concat_170x, x = var_1836)[name = string("input_65")]; tensor linear_59 = linear(bias = linear_3_bias_0, weight = model_model_layers_8_self_attn_o_proj_weight_quantized, x = input_65)[name = string("linear_59")]; tensor hidden_states_259 = add(x = hidden_states_239, y = linear_59)[name = string("hidden_states_259")]; fp32 var_94_promoted_17 = const()[name = string("op_94_promoted_17"), val = fp32(0x1p+1)]; tensor var_1845 = pow(x = hidden_states_259, y = var_94_promoted_17)[name = string("op_1845")]; tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; tensor variance_35 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1845)[name = string("variance_35")]; fp32 var_1848 = const()[name = string("op_1848"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1849 = add(x = variance_35, y = var_1848)[name = string("op_1849")]; fp32 var_1850_epsilon_0 = const()[name = string("op_1850_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1850 = rsqrt(epsilon = var_1850_epsilon_0, x = var_1849)[name = string("op_1850")]; tensor hidden_states_263 = mul(x = hidden_states_259, y = var_1850)[name = string("hidden_states_263")]; tensor input_67 = mul(x = model_model_layers_8_post_attention_layernorm_weight, y = hidden_states_263)[name = string("input_67")]; tensor linear_60 = linear(bias = linear_4_bias_0, weight = model_model_layers_8_mlp_gate_proj_weight_quantized, x = input_67)[name = string("linear_60")]; tensor var_1862 = silu(x = linear_60)[name = string("op_1862")]; tensor linear_61 = linear(bias = linear_4_bias_0, weight = model_model_layers_8_mlp_up_proj_weight_quantized, x = input_67)[name = string("linear_61")]; tensor input_71 = mul(x = var_1862, y = linear_61)[name = string("input_71")]; tensor linear_62 = linear(bias = linear_3_bias_0, weight = model_model_layers_8_mlp_down_proj_weight_quantized, x = input_71)[name = string("linear_62")]; tensor hidden_states_269 = add(x = hidden_states_259, y = linear_62)[name = string("hidden_states_269")]; fp32 var_94_promoted_18 = const()[name = string("op_94_promoted_18"), val = fp32(0x1p+1)]; tensor var_1875 = pow(x = hidden_states_269, y = var_94_promoted_18)[name = string("op_1875")]; tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; tensor variance_37 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1875)[name = string("variance_37")]; fp32 var_1878 = const()[name = string("op_1878"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1879 = add(x = variance_37, y = var_1878)[name = string("op_1879")]; fp32 var_1880_epsilon_0 = const()[name = string("op_1880_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1880 = rsqrt(epsilon = var_1880_epsilon_0, x = var_1879)[name = string("op_1880")]; tensor hidden_states_273 = mul(x = hidden_states_269, y = var_1880)[name = string("hidden_states_273")]; tensor hidden_states_277 = mul(x = model_model_layers_9_input_layernorm_weight, y = hidden_states_273)[name = string("hidden_states_277")]; tensor linear_63 = linear(bias = model_model_layers_9_self_attn_q_proj_bias, weight = model_model_layers_9_self_attn_q_proj_weight_quantized, x = hidden_states_277)[name = string("linear_63")]; tensor linear_64 = linear(bias = model_model_layers_9_self_attn_k_proj_bias, weight = model_model_layers_9_self_attn_k_proj_weight_quantized, x = hidden_states_277)[name = string("linear_64")]; tensor linear_65 = linear(bias = model_model_layers_9_self_attn_v_proj_bias, weight = model_model_layers_9_self_attn_v_proj_weight_quantized, x = hidden_states_277)[name = string("linear_65")]; tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 16, 128])]; tensor var_1903 = reshape(shape = concat_171x, x = linear_63)[name = string("op_1903")]; tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_172x = const()[name = string("concat_172x"), val = tensor([1, -1, 2, 128])]; tensor var_1906 = reshape(shape = concat_172x, x = linear_64)[name = string("op_1906")]; tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_173x = const()[name = string("concat_173x"), val = tensor([1, -1, 2, 128])]; tensor var_1909 = reshape(shape = concat_173x, x = linear_65)[name = string("op_1909")]; tensor v_state_19_perm_0 = const()[name = string("v_state_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_19 = transpose(perm = q_19_perm_0, x = var_1903)[name = string("transpose_107")]; tensor var_1913 = mul(x = q_19, y = cos_7)[name = string("op_1913")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19)[name = string("x2_37")]; fp32 const_21_promoted = const()[name = string("const_21_promoted"), val = fp32(-0x1p+0)]; tensor var_1924 = mul(x = x2_37, y = const_21_promoted)[name = string("op_1924")]; bool var_1926_interleave_0 = const()[name = string("op_1926_interleave_0"), val = bool(false)]; tensor var_1926 = concat(axis = var_88, interleave = var_1926_interleave_0, values = (var_1924, x1_37))[name = string("op_1926")]; tensor var_1927 = mul(x = var_1926, y = sin_7)[name = string("op_1927")]; tensor query_states_39 = add(x = var_1913, y = var_1927)[name = string("query_states_39")]; tensor k_19 = transpose(perm = k_19_perm_0, x = var_1906)[name = string("transpose_106")]; tensor var_1929 = mul(x = k_19, y = cos_7)[name = string("op_1929")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19)[name = string("x2_39")]; fp32 const_22_promoted = const()[name = string("const_22_promoted"), val = fp32(-0x1p+0)]; tensor var_1940 = mul(x = x2_39, y = const_22_promoted)[name = string("op_1940")]; bool var_1942_interleave_0 = const()[name = string("op_1942_interleave_0"), val = bool(false)]; tensor var_1942 = concat(axis = var_88, interleave = var_1942_interleave_0, values = (var_1940, x1_39))[name = string("op_1942")]; tensor var_1943 = mul(x = var_1942, y = sin_7)[name = string("op_1943")]; tensor k_state_19 = add(x = var_1929, y = var_1943)[name = string("k_state_19")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([0])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor concat_176_values0_0 = const()[name = string("concat_176_values0_0"), val = tensor([9])]; int32 concat_176_axis_0 = const()[name = string("concat_176_axis_0"), val = int32(0)]; bool concat_176_interleave_0 = const()[name = string("concat_176_interleave_0"), val = bool(false)]; tensor concat_176 = concat(axis = concat_176_axis_0, interleave = concat_176_interleave_0, values = (concat_176_values0_0, expand_dims_108, expand_dims_109, expand_dims_2, expand_dims_111))[name = string("concat_176")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10 = slice_update(begin = concat_176, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19, x = key_cache_internal_tensor_assign_9_1)[name = string("key_cache_internal_tensor_assign_10")]; string cast_139_dtype_0 = const()[name = string("cast_139_dtype_0"), val = string("fp16")]; tensor cast_139 = cast(dtype = cast_139_dtype_0, x = key_cache_internal_tensor_assign_10)[name = string("cast_108")]; write_state(data = cast_139, input = key_cache)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_90")]; string key_cache_internal_tensor_assign_10_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_10_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_19 = transpose(perm = v_state_19_perm_0, x = var_1909)[name = string("transpose_105")]; tensor value_cache_internal_tensor_assign_10 = slice_update(begin = concat_176, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19, x = value_cache_internal_tensor_assign_9_1)[name = string("value_cache_internal_tensor_assign_10")]; string cast_140_dtype_0 = const()[name = string("cast_140_dtype_0"), val = string("fp16")]; tensor cast_140 = cast(dtype = cast_140_dtype_0, x = value_cache_internal_tensor_assign_10)[name = string("cast_107")]; write_state(data = cast_140, input = value_cache)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_91")]; string value_cache_internal_tensor_assign_10_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_10_dtype_0"), val = string("fp32")]; tensor var_1966_begin_0 = const()[name = string("op_1966_begin_0"), val = tensor([9, 0, 0, 0, 0])]; tensor var_1966_end_0 = const()[name = string("op_1966_end_0"), val = tensor([10, 1, 2, 2048, 128])]; tensor var_1966_end_mask_0 = const()[name = string("op_1966_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1966_squeeze_mask_0 = const()[name = string("op_1966_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_1 = cast(dtype = key_cache_internal_tensor_assign_10_dtype_0, x = coreml_update_state_90)[name = string("cast_106")]; tensor var_1966 = slice_by_index(begin = var_1966_begin_0, end = var_1966_end_0, end_mask = var_1966_end_mask_0, squeeze_mask = var_1966_squeeze_mask_0, x = key_cache_internal_tensor_assign_10_1)[name = string("op_1966")]; tensor var_1969_begin_0 = const()[name = string("op_1969_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1969_end_mask_0 = const()[name = string("op_1969_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1969 = slice_by_index(begin = var_1969_begin_0, end = concat_11, end_mask = var_1969_end_mask_0, x = var_1966)[name = string("op_1969")]; tensor var_1971_begin_0 = const()[name = string("op_1971_begin_0"), val = tensor([9, 0, 0, 0, 0])]; tensor var_1971_end_0 = const()[name = string("op_1971_end_0"), val = tensor([10, 1, 2, 2048, 128])]; tensor var_1971_end_mask_0 = const()[name = string("op_1971_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1971_squeeze_mask_0 = const()[name = string("op_1971_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_10_1 = cast(dtype = value_cache_internal_tensor_assign_10_dtype_0, x = coreml_update_state_91)[name = string("cast_105")]; tensor var_1971 = slice_by_index(begin = var_1971_begin_0, end = var_1971_end_0, end_mask = var_1971_end_mask_0, squeeze_mask = var_1971_squeeze_mask_0, x = value_cache_internal_tensor_assign_10_1)[name = string("op_1971")]; tensor var_1974_begin_0 = const()[name = string("op_1974_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1974_end_mask_0 = const()[name = string("op_1974_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1974 = slice_by_index(begin = var_1974_begin_0, end = concat_11, end_mask = var_1974_end_mask_0, x = var_1971)[name = string("op_1974")]; tensor var_1976_shape = shape(x = var_1969)[name = string("op_1976_shape")]; int32 gather_175 = const()[name = string("gather_175"), val = int32(1)]; int32 gather_176 = const()[name = string("gather_176"), val = int32(2)]; int32 select_177 = const()[name = string("select_177"), val = int32(2)]; int32 gather_177_axis_0 = const()[name = string("gather_177_axis_0"), val = int32(0)]; int32 gather_177_batch_dims_0 = const()[name = string("gather_177_batch_dims_0"), val = int32(0)]; bool gather_177_validate_indices_0 = const()[name = string("gather_177_validate_indices_0"), val = bool(false)]; int32 gather_177 = gather(axis = gather_177_axis_0, batch_dims = gather_177_batch_dims_0, indices = select_177, validate_indices = gather_177_validate_indices_0, x = var_1976_shape)[name = string("gather_177")]; int32 gather_178 = const()[name = string("gather_178"), val = int32(128)]; tensor var_1983_axes_0 = const()[name = string("op_1983_axes_0"), val = tensor([2])]; tensor var_1983 = expand_dims(axes = var_1983_axes_0, x = var_1969)[name = string("op_1983")]; tensor shape_197 = shape(x = var_1983)[name = string("shape_197")]; int32 concat_184_axis_0 = const()[name = string("concat_184_axis_0"), val = int32(0)]; bool concat_184_interleave_0 = const()[name = string("concat_184_interleave_0"), val = bool(false)]; tensor concat_184 = concat(axis = concat_184_axis_0, interleave = concat_184_interleave_0, values = (gather_175, gather_176, var_100, gather_177, gather_178))[name = string("concat_184")]; tensor real_div_18 = real_div(x = concat_184, y = shape_197)[name = string("real_div_18")]; tensor hidden_states_281 = tile(reps = real_div_18, x = var_1983)[name = string("hidden_states_281")]; tensor concat_185x = const()[name = string("concat_185x"), val = tensor([1, 16, -1, 128])]; tensor key_states_39 = reshape(shape = concat_185x, x = hidden_states_281)[name = string("key_states_39")]; tensor var_1993_shape = shape(x = var_1974)[name = string("op_1993_shape")]; int32 gather_179 = const()[name = string("gather_179"), val = int32(1)]; int32 gather_180 = const()[name = string("gather_180"), val = int32(2)]; int32 select_181 = const()[name = string("select_181"), val = int32(2)]; int32 gather_181_axis_0 = const()[name = string("gather_181_axis_0"), val = int32(0)]; int32 gather_181_batch_dims_0 = const()[name = string("gather_181_batch_dims_0"), val = int32(0)]; bool gather_181_validate_indices_0 = const()[name = string("gather_181_validate_indices_0"), val = bool(false)]; int32 gather_181 = gather(axis = gather_181_axis_0, batch_dims = gather_181_batch_dims_0, indices = select_181, validate_indices = gather_181_validate_indices_0, x = var_1993_shape)[name = string("gather_181")]; int32 gather_182 = const()[name = string("gather_182"), val = int32(128)]; tensor var_2000_axes_0 = const()[name = string("op_2000_axes_0"), val = tensor([2])]; tensor var_2000 = expand_dims(axes = var_2000_axes_0, x = var_1974)[name = string("op_2000")]; tensor shape_202 = shape(x = var_2000)[name = string("shape_202")]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (gather_179, gather_180, var_100, gather_181, gather_182))[name = string("concat_186")]; tensor real_div_19 = real_div(x = concat_186, y = shape_202)[name = string("real_div_19")]; tensor hidden_states_285 = tile(reps = real_div_19, x = var_2000)[name = string("hidden_states_285")]; tensor concat_187x = const()[name = string("concat_187x"), val = tensor([1, 16, -1, 128])]; tensor value_states_39 = reshape(shape = concat_187x, x = hidden_states_285)[name = string("value_states_39")]; tensor var_2010_shape = shape(x = key_states_39)[name = string("op_2010_shape")]; int32 select_183 = const()[name = string("select_183"), val = int32(2)]; int32 gather_183_axis_0 = const()[name = string("gather_183_axis_0"), val = int32(0)]; int32 gather_183_batch_dims_0 = const()[name = string("gather_183_batch_dims_0"), val = int32(0)]; bool gather_183_validate_indices_0 = const()[name = string("gather_183_validate_indices_0"), val = bool(false)]; int32 gather_183 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183, validate_indices = gather_183_validate_indices_0, x = var_2010_shape)[name = string("gather_183")]; int32 concat_188_values0_0 = const()[name = string("concat_188_values0_0"), val = int32(1)]; int32 concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = int32(1)]; int32 concat_188_values2_0 = const()[name = string("concat_188_values2_0"), val = int32(0)]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (concat_188_values0_0, concat_188_values1_0, concat_188_values2_0, gather_183))[name = string("concat_188")]; tensor causal_mask_21_begin_0 = const()[name = string("causal_mask_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_21_end_mask_0 = const()[name = string("causal_mask_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_21 = slice_by_index(begin = causal_mask_21_begin_0, end = concat_188, end_mask = causal_mask_21_end_mask_0, x = cast_0)[name = string("causal_mask_21")]; tensor attn_output_37 = scaled_dot_product_attention(attn_mask = causal_mask_21, key = key_states_39, query = query_states_39, value = value_states_39)[name = string("attn_output_37")]; tensor var_2016_perm_0 = const()[name = string("op_2016_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 2048])]; tensor var_2016 = transpose(perm = var_2016_perm_0, x = attn_output_37)[name = string("transpose_104")]; tensor input_73 = reshape(shape = concat_189x, x = var_2016)[name = string("input_73")]; tensor linear_66 = linear(bias = linear_3_bias_0, weight = model_model_layers_9_self_attn_o_proj_weight_quantized, x = input_73)[name = string("linear_66")]; tensor hidden_states_289 = add(x = hidden_states_269, y = linear_66)[name = string("hidden_states_289")]; fp32 var_94_promoted_19 = const()[name = string("op_94_promoted_19"), val = fp32(0x1p+1)]; tensor var_2025 = pow(x = hidden_states_289, y = var_94_promoted_19)[name = string("op_2025")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; tensor variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_2025)[name = string("variance_39")]; fp32 var_2028 = const()[name = string("op_2028"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2029 = add(x = variance_39, y = var_2028)[name = string("op_2029")]; fp32 var_2030_epsilon_0 = const()[name = string("op_2030_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2030 = rsqrt(epsilon = var_2030_epsilon_0, x = var_2029)[name = string("op_2030")]; tensor hidden_states_293 = mul(x = hidden_states_289, y = var_2030)[name = string("hidden_states_293")]; tensor input_75 = mul(x = model_model_layers_9_post_attention_layernorm_weight, y = hidden_states_293)[name = string("input_75")]; tensor linear_67 = linear(bias = linear_4_bias_0, weight = model_model_layers_9_mlp_gate_proj_weight_quantized, x = input_75)[name = string("linear_67")]; tensor var_2042 = silu(x = linear_67)[name = string("op_2042")]; tensor linear_68 = linear(bias = linear_4_bias_0, weight = model_model_layers_9_mlp_up_proj_weight_quantized, x = input_75)[name = string("linear_68")]; tensor input_79 = mul(x = var_2042, y = linear_68)[name = string("input_79")]; tensor linear_69 = linear(bias = linear_3_bias_0, weight = model_model_layers_9_mlp_down_proj_weight_quantized, x = input_79)[name = string("linear_69")]; tensor hidden_states_299 = add(x = hidden_states_289, y = linear_69)[name = string("hidden_states_299")]; fp32 var_94_promoted_20 = const()[name = string("op_94_promoted_20"), val = fp32(0x1p+1)]; tensor var_2055 = pow(x = hidden_states_299, y = var_94_promoted_20)[name = string("op_2055")]; tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; tensor variance_41 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_2055)[name = string("variance_41")]; fp32 var_2058 = const()[name = string("op_2058"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2059 = add(x = variance_41, y = var_2058)[name = string("op_2059")]; fp32 var_2060_epsilon_0 = const()[name = string("op_2060_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2060 = rsqrt(epsilon = var_2060_epsilon_0, x = var_2059)[name = string("op_2060")]; tensor hidden_states_303 = mul(x = hidden_states_299, y = var_2060)[name = string("hidden_states_303")]; tensor hidden_states_307 = mul(x = model_model_layers_10_input_layernorm_weight, y = hidden_states_303)[name = string("hidden_states_307")]; tensor linear_70 = linear(bias = model_model_layers_10_self_attn_q_proj_bias, weight = model_model_layers_10_self_attn_q_proj_weight_quantized, x = hidden_states_307)[name = string("linear_70")]; tensor linear_71 = linear(bias = model_model_layers_10_self_attn_k_proj_bias, weight = model_model_layers_10_self_attn_k_proj_weight_quantized, x = hidden_states_307)[name = string("linear_71")]; tensor linear_72 = linear(bias = model_model_layers_10_self_attn_v_proj_bias, weight = model_model_layers_10_self_attn_v_proj_weight_quantized, x = hidden_states_307)[name = string("linear_72")]; tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 16, 128])]; tensor var_2083 = reshape(shape = concat_190x, x = linear_70)[name = string("op_2083")]; tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_191x = const()[name = string("concat_191x"), val = tensor([1, -1, 2, 128])]; tensor var_2086 = reshape(shape = concat_191x, x = linear_71)[name = string("op_2086")]; tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_192x = const()[name = string("concat_192x"), val = tensor([1, -1, 2, 128])]; tensor var_2089 = reshape(shape = concat_192x, x = linear_72)[name = string("op_2089")]; tensor v_state_21_perm_0 = const()[name = string("v_state_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_21 = transpose(perm = q_21_perm_0, x = var_2083)[name = string("transpose_103")]; tensor var_2093 = mul(x = q_21, y = cos_7)[name = string("op_2093")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21)[name = string("x2_41")]; fp32 const_23_promoted = const()[name = string("const_23_promoted"), val = fp32(-0x1p+0)]; tensor var_2104 = mul(x = x2_41, y = const_23_promoted)[name = string("op_2104")]; bool var_2106_interleave_0 = const()[name = string("op_2106_interleave_0"), val = bool(false)]; tensor var_2106 = concat(axis = var_88, interleave = var_2106_interleave_0, values = (var_2104, x1_41))[name = string("op_2106")]; tensor var_2107 = mul(x = var_2106, y = sin_7)[name = string("op_2107")]; tensor query_states_43 = add(x = var_2093, y = var_2107)[name = string("query_states_43")]; tensor k_21 = transpose(perm = k_21_perm_0, x = var_2086)[name = string("transpose_102")]; tensor var_2109 = mul(x = k_21, y = cos_7)[name = string("op_2109")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21)[name = string("x2_43")]; fp32 const_24_promoted = const()[name = string("const_24_promoted"), val = fp32(-0x1p+0)]; tensor var_2120 = mul(x = x2_43, y = const_24_promoted)[name = string("op_2120")]; bool var_2122_interleave_0 = const()[name = string("op_2122_interleave_0"), val = bool(false)]; tensor var_2122 = concat(axis = var_88, interleave = var_2122_interleave_0, values = (var_2120, x1_43))[name = string("op_2122")]; tensor var_2123 = mul(x = var_2122, y = sin_7)[name = string("op_2123")]; tensor k_state_21 = add(x = var_2109, y = var_2123)[name = string("k_state_21")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([0])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor([10])]; int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, expand_dims_120, expand_dims_121, expand_dims_2, expand_dims_123))[name = string("concat_195")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11 = slice_update(begin = concat_195, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21, x = key_cache_internal_tensor_assign_10_1)[name = string("key_cache_internal_tensor_assign_11")]; string cast_153_dtype_0 = const()[name = string("cast_153_dtype_0"), val = string("fp16")]; tensor cast_153 = cast(dtype = cast_153_dtype_0, x = key_cache_internal_tensor_assign_11)[name = string("cast_104")]; write_state(data = cast_153, input = key_cache)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_92")]; string key_cache_internal_tensor_assign_11_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_11_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_21 = transpose(perm = v_state_21_perm_0, x = var_2089)[name = string("transpose_101")]; tensor value_cache_internal_tensor_assign_11 = slice_update(begin = concat_195, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21, x = value_cache_internal_tensor_assign_10_1)[name = string("value_cache_internal_tensor_assign_11")]; string cast_154_dtype_0 = const()[name = string("cast_154_dtype_0"), val = string("fp16")]; tensor cast_154 = cast(dtype = cast_154_dtype_0, x = value_cache_internal_tensor_assign_11)[name = string("cast_103")]; write_state(data = cast_154, input = value_cache)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_93")]; string value_cache_internal_tensor_assign_11_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_11_dtype_0"), val = string("fp32")]; tensor var_2146_begin_0 = const()[name = string("op_2146_begin_0"), val = tensor([10, 0, 0, 0, 0])]; tensor var_2146_end_0 = const()[name = string("op_2146_end_0"), val = tensor([11, 1, 2, 2048, 128])]; tensor var_2146_end_mask_0 = const()[name = string("op_2146_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2146_squeeze_mask_0 = const()[name = string("op_2146_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_1 = cast(dtype = key_cache_internal_tensor_assign_11_dtype_0, x = coreml_update_state_92)[name = string("cast_102")]; tensor var_2146 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, squeeze_mask = var_2146_squeeze_mask_0, x = key_cache_internal_tensor_assign_11_1)[name = string("op_2146")]; tensor var_2149_begin_0 = const()[name = string("op_2149_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2149_end_mask_0 = const()[name = string("op_2149_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2149 = slice_by_index(begin = var_2149_begin_0, end = concat_11, end_mask = var_2149_end_mask_0, x = var_2146)[name = string("op_2149")]; tensor var_2151_begin_0 = const()[name = string("op_2151_begin_0"), val = tensor([10, 0, 0, 0, 0])]; tensor var_2151_end_0 = const()[name = string("op_2151_end_0"), val = tensor([11, 1, 2, 2048, 128])]; tensor var_2151_end_mask_0 = const()[name = string("op_2151_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2151_squeeze_mask_0 = const()[name = string("op_2151_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_11_1 = cast(dtype = value_cache_internal_tensor_assign_11_dtype_0, x = coreml_update_state_93)[name = string("cast_101")]; tensor var_2151 = slice_by_index(begin = var_2151_begin_0, end = var_2151_end_0, end_mask = var_2151_end_mask_0, squeeze_mask = var_2151_squeeze_mask_0, x = value_cache_internal_tensor_assign_11_1)[name = string("op_2151")]; tensor var_2154_begin_0 = const()[name = string("op_2154_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2154_end_mask_0 = const()[name = string("op_2154_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2154 = slice_by_index(begin = var_2154_begin_0, end = concat_11, end_mask = var_2154_end_mask_0, x = var_2151)[name = string("op_2154")]; tensor var_2156_shape = shape(x = var_2149)[name = string("op_2156_shape")]; int32 gather_193 = const()[name = string("gather_193"), val = int32(1)]; int32 gather_194 = const()[name = string("gather_194"), val = int32(2)]; int32 select_195 = const()[name = string("select_195"), val = int32(2)]; int32 gather_195_axis_0 = const()[name = string("gather_195_axis_0"), val = int32(0)]; int32 gather_195_batch_dims_0 = const()[name = string("gather_195_batch_dims_0"), val = int32(0)]; bool gather_195_validate_indices_0 = const()[name = string("gather_195_validate_indices_0"), val = bool(false)]; int32 gather_195 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195, validate_indices = gather_195_validate_indices_0, x = var_2156_shape)[name = string("gather_195")]; int32 gather_196 = const()[name = string("gather_196"), val = int32(128)]; tensor var_2163_axes_0 = const()[name = string("op_2163_axes_0"), val = tensor([2])]; tensor var_2163 = expand_dims(axes = var_2163_axes_0, x = var_2149)[name = string("op_2163")]; tensor shape_217 = shape(x = var_2163)[name = string("shape_217")]; int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (gather_193, gather_194, var_100, gather_195, gather_196))[name = string("concat_203")]; tensor real_div_20 = real_div(x = concat_203, y = shape_217)[name = string("real_div_20")]; tensor hidden_states_311 = tile(reps = real_div_20, x = var_2163)[name = string("hidden_states_311")]; tensor concat_204x = const()[name = string("concat_204x"), val = tensor([1, 16, -1, 128])]; tensor key_states_43 = reshape(shape = concat_204x, x = hidden_states_311)[name = string("key_states_43")]; tensor var_2173_shape = shape(x = var_2154)[name = string("op_2173_shape")]; int32 gather_197 = const()[name = string("gather_197"), val = int32(1)]; int32 gather_198 = const()[name = string("gather_198"), val = int32(2)]; int32 select_199 = const()[name = string("select_199"), val = int32(2)]; int32 gather_199_axis_0 = const()[name = string("gather_199_axis_0"), val = int32(0)]; int32 gather_199_batch_dims_0 = const()[name = string("gather_199_batch_dims_0"), val = int32(0)]; bool gather_199_validate_indices_0 = const()[name = string("gather_199_validate_indices_0"), val = bool(false)]; int32 gather_199 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = select_199, validate_indices = gather_199_validate_indices_0, x = var_2173_shape)[name = string("gather_199")]; int32 gather_200 = const()[name = string("gather_200"), val = int32(128)]; tensor var_2180_axes_0 = const()[name = string("op_2180_axes_0"), val = tensor([2])]; tensor var_2180 = expand_dims(axes = var_2180_axes_0, x = var_2154)[name = string("op_2180")]; tensor shape_222 = shape(x = var_2180)[name = string("shape_222")]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (gather_197, gather_198, var_100, gather_199, gather_200))[name = string("concat_205")]; tensor real_div_21 = real_div(x = concat_205, y = shape_222)[name = string("real_div_21")]; tensor hidden_states_315 = tile(reps = real_div_21, x = var_2180)[name = string("hidden_states_315")]; tensor concat_206x = const()[name = string("concat_206x"), val = tensor([1, 16, -1, 128])]; tensor value_states_43 = reshape(shape = concat_206x, x = hidden_states_315)[name = string("value_states_43")]; tensor var_2190_shape = shape(x = key_states_43)[name = string("op_2190_shape")]; int32 select_201 = const()[name = string("select_201"), val = int32(2)]; int32 gather_201_axis_0 = const()[name = string("gather_201_axis_0"), val = int32(0)]; int32 gather_201_batch_dims_0 = const()[name = string("gather_201_batch_dims_0"), val = int32(0)]; bool gather_201_validate_indices_0 = const()[name = string("gather_201_validate_indices_0"), val = bool(false)]; int32 gather_201 = gather(axis = gather_201_axis_0, batch_dims = gather_201_batch_dims_0, indices = select_201, validate_indices = gather_201_validate_indices_0, x = var_2190_shape)[name = string("gather_201")]; int32 concat_207_values0_0 = const()[name = string("concat_207_values0_0"), val = int32(1)]; int32 concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = int32(1)]; int32 concat_207_values2_0 = const()[name = string("concat_207_values2_0"), val = int32(0)]; int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (concat_207_values0_0, concat_207_values1_0, concat_207_values2_0, gather_201))[name = string("concat_207")]; tensor causal_mask_23_begin_0 = const()[name = string("causal_mask_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_23_end_mask_0 = const()[name = string("causal_mask_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_23 = slice_by_index(begin = causal_mask_23_begin_0, end = concat_207, end_mask = causal_mask_23_end_mask_0, x = cast_0)[name = string("causal_mask_23")]; tensor attn_output_41 = scaled_dot_product_attention(attn_mask = causal_mask_23, key = key_states_43, query = query_states_43, value = value_states_43)[name = string("attn_output_41")]; tensor var_2196_perm_0 = const()[name = string("op_2196_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_208x = const()[name = string("concat_208x"), val = tensor([1, -1, 2048])]; tensor var_2196 = transpose(perm = var_2196_perm_0, x = attn_output_41)[name = string("transpose_100")]; tensor input_81 = reshape(shape = concat_208x, x = var_2196)[name = string("input_81")]; tensor linear_73 = linear(bias = linear_3_bias_0, weight = model_model_layers_10_self_attn_o_proj_weight_quantized, x = input_81)[name = string("linear_73")]; tensor hidden_states_319 = add(x = hidden_states_299, y = linear_73)[name = string("hidden_states_319")]; fp32 var_94_promoted_21 = const()[name = string("op_94_promoted_21"), val = fp32(0x1p+1)]; tensor var_2205 = pow(x = hidden_states_319, y = var_94_promoted_21)[name = string("op_2205")]; tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; tensor variance_43 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_2205)[name = string("variance_43")]; fp32 var_2208 = const()[name = string("op_2208"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2209 = add(x = variance_43, y = var_2208)[name = string("op_2209")]; fp32 var_2210_epsilon_0 = const()[name = string("op_2210_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2210 = rsqrt(epsilon = var_2210_epsilon_0, x = var_2209)[name = string("op_2210")]; tensor hidden_states_323 = mul(x = hidden_states_319, y = var_2210)[name = string("hidden_states_323")]; tensor input_83 = mul(x = model_model_layers_10_post_attention_layernorm_weight, y = hidden_states_323)[name = string("input_83")]; tensor linear_74 = linear(bias = linear_4_bias_0, weight = model_model_layers_10_mlp_gate_proj_weight_quantized, x = input_83)[name = string("linear_74")]; tensor var_2222 = silu(x = linear_74)[name = string("op_2222")]; tensor linear_75 = linear(bias = linear_4_bias_0, weight = model_model_layers_10_mlp_up_proj_weight_quantized, x = input_83)[name = string("linear_75")]; tensor input_87 = mul(x = var_2222, y = linear_75)[name = string("input_87")]; tensor linear_76 = linear(bias = linear_3_bias_0, weight = model_model_layers_10_mlp_down_proj_weight_quantized, x = input_87)[name = string("linear_76")]; tensor hidden_states_329 = add(x = hidden_states_319, y = linear_76)[name = string("hidden_states_329")]; fp32 var_94_promoted_22 = const()[name = string("op_94_promoted_22"), val = fp32(0x1p+1)]; tensor var_2235 = pow(x = hidden_states_329, y = var_94_promoted_22)[name = string("op_2235")]; tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; tensor variance_45 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_2235)[name = string("variance_45")]; fp32 var_2238 = const()[name = string("op_2238"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2239 = add(x = variance_45, y = var_2238)[name = string("op_2239")]; fp32 var_2240_epsilon_0 = const()[name = string("op_2240_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2240 = rsqrt(epsilon = var_2240_epsilon_0, x = var_2239)[name = string("op_2240")]; tensor hidden_states_333 = mul(x = hidden_states_329, y = var_2240)[name = string("hidden_states_333")]; tensor hidden_states_337 = mul(x = model_model_layers_11_input_layernorm_weight, y = hidden_states_333)[name = string("hidden_states_337")]; tensor linear_77 = linear(bias = model_model_layers_11_self_attn_q_proj_bias, weight = model_model_layers_11_self_attn_q_proj_weight_quantized, x = hidden_states_337)[name = string("linear_77")]; tensor linear_78 = linear(bias = model_model_layers_11_self_attn_k_proj_bias, weight = model_model_layers_11_self_attn_k_proj_weight_quantized, x = hidden_states_337)[name = string("linear_78")]; tensor linear_79 = linear(bias = model_model_layers_11_self_attn_v_proj_bias, weight = model_model_layers_11_self_attn_v_proj_weight_quantized, x = hidden_states_337)[name = string("linear_79")]; tensor concat_209x = const()[name = string("concat_209x"), val = tensor([1, -1, 16, 128])]; tensor var_2263 = reshape(shape = concat_209x, x = linear_77)[name = string("op_2263")]; tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 2, 128])]; tensor var_2266 = reshape(shape = concat_210x, x = linear_78)[name = string("op_2266")]; tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 2, 128])]; tensor var_2269 = reshape(shape = concat_211x, x = linear_79)[name = string("op_2269")]; tensor v_state_23_perm_0 = const()[name = string("v_state_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_23 = transpose(perm = q_23_perm_0, x = var_2263)[name = string("transpose_99")]; tensor var_2273 = mul(x = q_23, y = cos_7)[name = string("op_2273")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23)[name = string("x2_45")]; fp32 const_25_promoted = const()[name = string("const_25_promoted"), val = fp32(-0x1p+0)]; tensor var_2284 = mul(x = x2_45, y = const_25_promoted)[name = string("op_2284")]; bool var_2286_interleave_0 = const()[name = string("op_2286_interleave_0"), val = bool(false)]; tensor var_2286 = concat(axis = var_88, interleave = var_2286_interleave_0, values = (var_2284, x1_45))[name = string("op_2286")]; tensor var_2287 = mul(x = var_2286, y = sin_7)[name = string("op_2287")]; tensor query_states_47 = add(x = var_2273, y = var_2287)[name = string("query_states_47")]; tensor k_23 = transpose(perm = k_23_perm_0, x = var_2266)[name = string("transpose_98")]; tensor var_2289 = mul(x = k_23, y = cos_7)[name = string("op_2289")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23)[name = string("x2_47")]; fp32 const_26_promoted = const()[name = string("const_26_promoted"), val = fp32(-0x1p+0)]; tensor var_2300 = mul(x = x2_47, y = const_26_promoted)[name = string("op_2300")]; bool var_2302_interleave_0 = const()[name = string("op_2302_interleave_0"), val = bool(false)]; tensor var_2302 = concat(axis = var_88, interleave = var_2302_interleave_0, values = (var_2300, x1_47))[name = string("op_2302")]; tensor var_2303 = mul(x = var_2302, y = sin_7)[name = string("op_2303")]; tensor k_state_23 = add(x = var_2289, y = var_2303)[name = string("k_state_23")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = tensor([11])]; int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, expand_dims_132, expand_dims_133, expand_dims_2, expand_dims_135))[name = string("concat_214")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12 = slice_update(begin = concat_214, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23, x = key_cache_internal_tensor_assign_11_1)[name = string("key_cache_internal_tensor_assign_12")]; string cast_167_dtype_0 = const()[name = string("cast_167_dtype_0"), val = string("fp16")]; tensor cast_167 = cast(dtype = cast_167_dtype_0, x = key_cache_internal_tensor_assign_12)[name = string("cast_100")]; write_state(data = cast_167, input = key_cache)[name = string("coreml_update_state_94_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_94")]; string key_cache_internal_tensor_assign_12_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_12_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_23 = transpose(perm = v_state_23_perm_0, x = var_2269)[name = string("transpose_97")]; tensor value_cache_internal_tensor_assign_12 = slice_update(begin = concat_214, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23, x = value_cache_internal_tensor_assign_11_1)[name = string("value_cache_internal_tensor_assign_12")]; string cast_168_dtype_0 = const()[name = string("cast_168_dtype_0"), val = string("fp16")]; tensor cast_168 = cast(dtype = cast_168_dtype_0, x = value_cache_internal_tensor_assign_12)[name = string("cast_99")]; write_state(data = cast_168, input = value_cache)[name = string("coreml_update_state_95_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_95")]; string value_cache_internal_tensor_assign_12_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_12_dtype_0"), val = string("fp32")]; tensor var_2326_begin_0 = const()[name = string("op_2326_begin_0"), val = tensor([11, 0, 0, 0, 0])]; tensor var_2326_end_0 = const()[name = string("op_2326_end_0"), val = tensor([12, 1, 2, 2048, 128])]; tensor var_2326_end_mask_0 = const()[name = string("op_2326_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2326_squeeze_mask_0 = const()[name = string("op_2326_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_1 = cast(dtype = key_cache_internal_tensor_assign_12_dtype_0, x = coreml_update_state_94)[name = string("cast_98")]; tensor var_2326 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, squeeze_mask = var_2326_squeeze_mask_0, x = key_cache_internal_tensor_assign_12_1)[name = string("op_2326")]; tensor var_2329_begin_0 = const()[name = string("op_2329_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2329_end_mask_0 = const()[name = string("op_2329_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2329 = slice_by_index(begin = var_2329_begin_0, end = concat_11, end_mask = var_2329_end_mask_0, x = var_2326)[name = string("op_2329")]; tensor var_2331_begin_0 = const()[name = string("op_2331_begin_0"), val = tensor([11, 0, 0, 0, 0])]; tensor var_2331_end_0 = const()[name = string("op_2331_end_0"), val = tensor([12, 1, 2, 2048, 128])]; tensor var_2331_end_mask_0 = const()[name = string("op_2331_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2331_squeeze_mask_0 = const()[name = string("op_2331_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_12_1 = cast(dtype = value_cache_internal_tensor_assign_12_dtype_0, x = coreml_update_state_95)[name = string("cast_97")]; tensor var_2331 = slice_by_index(begin = var_2331_begin_0, end = var_2331_end_0, end_mask = var_2331_end_mask_0, squeeze_mask = var_2331_squeeze_mask_0, x = value_cache_internal_tensor_assign_12_1)[name = string("op_2331")]; tensor var_2334_begin_0 = const()[name = string("op_2334_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2334_end_mask_0 = const()[name = string("op_2334_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2334 = slice_by_index(begin = var_2334_begin_0, end = concat_11, end_mask = var_2334_end_mask_0, x = var_2331)[name = string("op_2334")]; tensor var_2336_shape = shape(x = var_2329)[name = string("op_2336_shape")]; int32 gather_211 = const()[name = string("gather_211"), val = int32(1)]; int32 gather_212 = const()[name = string("gather_212"), val = int32(2)]; int32 select_213 = const()[name = string("select_213"), val = int32(2)]; int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; int32 gather_213 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213, validate_indices = gather_213_validate_indices_0, x = var_2336_shape)[name = string("gather_213")]; int32 gather_214 = const()[name = string("gather_214"), val = int32(128)]; tensor var_2343_axes_0 = const()[name = string("op_2343_axes_0"), val = tensor([2])]; tensor var_2343 = expand_dims(axes = var_2343_axes_0, x = var_2329)[name = string("op_2343")]; tensor shape_237 = shape(x = var_2343)[name = string("shape_237")]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (gather_211, gather_212, var_100, gather_213, gather_214))[name = string("concat_222")]; tensor real_div_22 = real_div(x = concat_222, y = shape_237)[name = string("real_div_22")]; tensor hidden_states_341 = tile(reps = real_div_22, x = var_2343)[name = string("hidden_states_341")]; tensor concat_223x = const()[name = string("concat_223x"), val = tensor([1, 16, -1, 128])]; tensor key_states_47 = reshape(shape = concat_223x, x = hidden_states_341)[name = string("key_states_47")]; tensor var_2353_shape = shape(x = var_2334)[name = string("op_2353_shape")]; int32 gather_215 = const()[name = string("gather_215"), val = int32(1)]; int32 gather_216 = const()[name = string("gather_216"), val = int32(2)]; int32 select_217 = const()[name = string("select_217"), val = int32(2)]; int32 gather_217_axis_0 = const()[name = string("gather_217_axis_0"), val = int32(0)]; int32 gather_217_batch_dims_0 = const()[name = string("gather_217_batch_dims_0"), val = int32(0)]; bool gather_217_validate_indices_0 = const()[name = string("gather_217_validate_indices_0"), val = bool(false)]; int32 gather_217 = gather(axis = gather_217_axis_0, batch_dims = gather_217_batch_dims_0, indices = select_217, validate_indices = gather_217_validate_indices_0, x = var_2353_shape)[name = string("gather_217")]; int32 gather_218 = const()[name = string("gather_218"), val = int32(128)]; tensor var_2360_axes_0 = const()[name = string("op_2360_axes_0"), val = tensor([2])]; tensor var_2360 = expand_dims(axes = var_2360_axes_0, x = var_2334)[name = string("op_2360")]; tensor shape_242 = shape(x = var_2360)[name = string("shape_242")]; int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (gather_215, gather_216, var_100, gather_217, gather_218))[name = string("concat_224")]; tensor real_div_23 = real_div(x = concat_224, y = shape_242)[name = string("real_div_23")]; tensor hidden_states_345 = tile(reps = real_div_23, x = var_2360)[name = string("hidden_states_345")]; tensor concat_225x = const()[name = string("concat_225x"), val = tensor([1, 16, -1, 128])]; tensor value_states_47 = reshape(shape = concat_225x, x = hidden_states_345)[name = string("value_states_47")]; tensor var_2370_shape = shape(x = key_states_47)[name = string("op_2370_shape")]; int32 select_219 = const()[name = string("select_219"), val = int32(2)]; int32 gather_219_axis_0 = const()[name = string("gather_219_axis_0"), val = int32(0)]; int32 gather_219_batch_dims_0 = const()[name = string("gather_219_batch_dims_0"), val = int32(0)]; bool gather_219_validate_indices_0 = const()[name = string("gather_219_validate_indices_0"), val = bool(false)]; int32 gather_219 = gather(axis = gather_219_axis_0, batch_dims = gather_219_batch_dims_0, indices = select_219, validate_indices = gather_219_validate_indices_0, x = var_2370_shape)[name = string("gather_219")]; int32 concat_226_values0_0 = const()[name = string("concat_226_values0_0"), val = int32(1)]; int32 concat_226_values1_0 = const()[name = string("concat_226_values1_0"), val = int32(1)]; int32 concat_226_values2_0 = const()[name = string("concat_226_values2_0"), val = int32(0)]; int32 concat_226_axis_0 = const()[name = string("concat_226_axis_0"), val = int32(0)]; bool concat_226_interleave_0 = const()[name = string("concat_226_interleave_0"), val = bool(false)]; tensor concat_226 = concat(axis = concat_226_axis_0, interleave = concat_226_interleave_0, values = (concat_226_values0_0, concat_226_values1_0, concat_226_values2_0, gather_219))[name = string("concat_226")]; tensor causal_mask_25_begin_0 = const()[name = string("causal_mask_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_25_end_mask_0 = const()[name = string("causal_mask_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_25 = slice_by_index(begin = causal_mask_25_begin_0, end = concat_226, end_mask = causal_mask_25_end_mask_0, x = cast_0)[name = string("causal_mask_25")]; tensor attn_output_45 = scaled_dot_product_attention(attn_mask = causal_mask_25, key = key_states_47, query = query_states_47, value = value_states_47)[name = string("attn_output_45")]; tensor var_2376_perm_0 = const()[name = string("op_2376_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_227x = const()[name = string("concat_227x"), val = tensor([1, -1, 2048])]; tensor var_2376 = transpose(perm = var_2376_perm_0, x = attn_output_45)[name = string("transpose_96")]; tensor input_89 = reshape(shape = concat_227x, x = var_2376)[name = string("input_89")]; tensor linear_80 = linear(bias = linear_3_bias_0, weight = model_model_layers_11_self_attn_o_proj_weight_quantized, x = input_89)[name = string("linear_80")]; tensor hidden_states_349 = add(x = hidden_states_329, y = linear_80)[name = string("hidden_states_349")]; fp32 var_94_promoted_23 = const()[name = string("op_94_promoted_23"), val = fp32(0x1p+1)]; tensor var_2385 = pow(x = hidden_states_349, y = var_94_promoted_23)[name = string("op_2385")]; tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; tensor variance_47 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_2385)[name = string("variance_47")]; fp32 var_2388 = const()[name = string("op_2388"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2389 = add(x = variance_47, y = var_2388)[name = string("op_2389")]; fp32 var_2390_epsilon_0 = const()[name = string("op_2390_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2390 = rsqrt(epsilon = var_2390_epsilon_0, x = var_2389)[name = string("op_2390")]; tensor hidden_states_353 = mul(x = hidden_states_349, y = var_2390)[name = string("hidden_states_353")]; tensor input_91 = mul(x = model_model_layers_11_post_attention_layernorm_weight, y = hidden_states_353)[name = string("input_91")]; tensor linear_81 = linear(bias = linear_4_bias_0, weight = model_model_layers_11_mlp_gate_proj_weight_quantized, x = input_91)[name = string("linear_81")]; tensor var_2402 = silu(x = linear_81)[name = string("op_2402")]; tensor linear_82 = linear(bias = linear_4_bias_0, weight = model_model_layers_11_mlp_up_proj_weight_quantized, x = input_91)[name = string("linear_82")]; tensor input_95 = mul(x = var_2402, y = linear_82)[name = string("input_95")]; tensor linear_83 = linear(bias = linear_3_bias_0, weight = model_model_layers_11_mlp_down_proj_weight_quantized, x = input_95)[name = string("linear_83")]; tensor hidden_states_359 = add(x = hidden_states_349, y = linear_83)[name = string("hidden_states_359")]; fp32 var_94_promoted_24 = const()[name = string("op_94_promoted_24"), val = fp32(0x1p+1)]; tensor var_2415 = pow(x = hidden_states_359, y = var_94_promoted_24)[name = string("op_2415")]; tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; tensor variance_49 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_2415)[name = string("variance_49")]; fp32 var_2418 = const()[name = string("op_2418"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2419 = add(x = variance_49, y = var_2418)[name = string("op_2419")]; fp32 var_2420_epsilon_0 = const()[name = string("op_2420_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2420 = rsqrt(epsilon = var_2420_epsilon_0, x = var_2419)[name = string("op_2420")]; tensor hidden_states_363 = mul(x = hidden_states_359, y = var_2420)[name = string("hidden_states_363")]; tensor hidden_states_367 = mul(x = model_model_layers_12_input_layernorm_weight, y = hidden_states_363)[name = string("hidden_states_367")]; tensor linear_84 = linear(bias = model_model_layers_12_self_attn_q_proj_bias, weight = model_model_layers_12_self_attn_q_proj_weight_quantized, x = hidden_states_367)[name = string("linear_84")]; tensor linear_85 = linear(bias = model_model_layers_12_self_attn_k_proj_bias, weight = model_model_layers_12_self_attn_k_proj_weight_quantized, x = hidden_states_367)[name = string("linear_85")]; tensor linear_86 = linear(bias = model_model_layers_12_self_attn_v_proj_bias, weight = model_model_layers_12_self_attn_v_proj_weight_quantized, x = hidden_states_367)[name = string("linear_86")]; tensor concat_228x = const()[name = string("concat_228x"), val = tensor([1, -1, 16, 128])]; tensor var_2443 = reshape(shape = concat_228x, x = linear_84)[name = string("op_2443")]; tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_229x = const()[name = string("concat_229x"), val = tensor([1, -1, 2, 128])]; tensor var_2446 = reshape(shape = concat_229x, x = linear_85)[name = string("op_2446")]; tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_230x = const()[name = string("concat_230x"), val = tensor([1, -1, 2, 128])]; tensor var_2449 = reshape(shape = concat_230x, x = linear_86)[name = string("op_2449")]; tensor v_state_25_perm_0 = const()[name = string("v_state_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_25 = transpose(perm = q_25_perm_0, x = var_2443)[name = string("transpose_95")]; tensor var_2453 = mul(x = q_25, y = cos_7)[name = string("op_2453")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25)[name = string("x2_49")]; fp32 const_27_promoted = const()[name = string("const_27_promoted"), val = fp32(-0x1p+0)]; tensor var_2464 = mul(x = x2_49, y = const_27_promoted)[name = string("op_2464")]; bool var_2466_interleave_0 = const()[name = string("op_2466_interleave_0"), val = bool(false)]; tensor var_2466 = concat(axis = var_88, interleave = var_2466_interleave_0, values = (var_2464, x1_49))[name = string("op_2466")]; tensor var_2467 = mul(x = var_2466, y = sin_7)[name = string("op_2467")]; tensor query_states_51 = add(x = var_2453, y = var_2467)[name = string("query_states_51")]; tensor k_25 = transpose(perm = k_25_perm_0, x = var_2446)[name = string("transpose_94")]; tensor var_2469 = mul(x = k_25, y = cos_7)[name = string("op_2469")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25)[name = string("x2_51")]; fp32 const_28_promoted = const()[name = string("const_28_promoted"), val = fp32(-0x1p+0)]; tensor var_2480 = mul(x = x2_51, y = const_28_promoted)[name = string("op_2480")]; bool var_2482_interleave_0 = const()[name = string("op_2482_interleave_0"), val = bool(false)]; tensor var_2482 = concat(axis = var_88, interleave = var_2482_interleave_0, values = (var_2480, x1_51))[name = string("op_2482")]; tensor var_2483 = mul(x = var_2482, y = sin_7)[name = string("op_2483")]; tensor k_state_25 = add(x = var_2469, y = var_2483)[name = string("k_state_25")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor concat_233_values0_0 = const()[name = string("concat_233_values0_0"), val = tensor([12])]; int32 concat_233_axis_0 = const()[name = string("concat_233_axis_0"), val = int32(0)]; bool concat_233_interleave_0 = const()[name = string("concat_233_interleave_0"), val = bool(false)]; tensor concat_233 = concat(axis = concat_233_axis_0, interleave = concat_233_interleave_0, values = (concat_233_values0_0, expand_dims_144, expand_dims_145, expand_dims_2, expand_dims_147))[name = string("concat_233")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13 = slice_update(begin = concat_233, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25, x = key_cache_internal_tensor_assign_12_1)[name = string("key_cache_internal_tensor_assign_13")]; string cast_181_dtype_0 = const()[name = string("cast_181_dtype_0"), val = string("fp16")]; tensor cast_181 = cast(dtype = cast_181_dtype_0, x = key_cache_internal_tensor_assign_13)[name = string("cast_96")]; write_state(data = cast_181, input = key_cache)[name = string("coreml_update_state_96_write_state")]; tensor coreml_update_state_96 = read_state(input = key_cache)[name = string("coreml_update_state_96")]; string key_cache_internal_tensor_assign_13_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_13_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_25 = transpose(perm = v_state_25_perm_0, x = var_2449)[name = string("transpose_93")]; tensor value_cache_internal_tensor_assign_13 = slice_update(begin = concat_233, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25, x = value_cache_internal_tensor_assign_12_1)[name = string("value_cache_internal_tensor_assign_13")]; string cast_182_dtype_0 = const()[name = string("cast_182_dtype_0"), val = string("fp16")]; tensor cast_182 = cast(dtype = cast_182_dtype_0, x = value_cache_internal_tensor_assign_13)[name = string("cast_95")]; write_state(data = cast_182, input = value_cache)[name = string("coreml_update_state_97_write_state")]; tensor coreml_update_state_97 = read_state(input = value_cache)[name = string("coreml_update_state_97")]; string value_cache_internal_tensor_assign_13_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_13_dtype_0"), val = string("fp32")]; tensor var_2506_begin_0 = const()[name = string("op_2506_begin_0"), val = tensor([12, 0, 0, 0, 0])]; tensor var_2506_end_0 = const()[name = string("op_2506_end_0"), val = tensor([13, 1, 2, 2048, 128])]; tensor var_2506_end_mask_0 = const()[name = string("op_2506_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2506_squeeze_mask_0 = const()[name = string("op_2506_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_1 = cast(dtype = key_cache_internal_tensor_assign_13_dtype_0, x = coreml_update_state_96)[name = string("cast_94")]; tensor var_2506 = slice_by_index(begin = var_2506_begin_0, end = var_2506_end_0, end_mask = var_2506_end_mask_0, squeeze_mask = var_2506_squeeze_mask_0, x = key_cache_internal_tensor_assign_13_1)[name = string("op_2506")]; tensor var_2509_begin_0 = const()[name = string("op_2509_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2509_end_mask_0 = const()[name = string("op_2509_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2509 = slice_by_index(begin = var_2509_begin_0, end = concat_11, end_mask = var_2509_end_mask_0, x = var_2506)[name = string("op_2509")]; tensor var_2511_begin_0 = const()[name = string("op_2511_begin_0"), val = tensor([12, 0, 0, 0, 0])]; tensor var_2511_end_0 = const()[name = string("op_2511_end_0"), val = tensor([13, 1, 2, 2048, 128])]; tensor var_2511_end_mask_0 = const()[name = string("op_2511_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2511_squeeze_mask_0 = const()[name = string("op_2511_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_13_1 = cast(dtype = value_cache_internal_tensor_assign_13_dtype_0, x = coreml_update_state_97)[name = string("cast_93")]; tensor var_2511 = slice_by_index(begin = var_2511_begin_0, end = var_2511_end_0, end_mask = var_2511_end_mask_0, squeeze_mask = var_2511_squeeze_mask_0, x = value_cache_internal_tensor_assign_13_1)[name = string("op_2511")]; tensor var_2514_begin_0 = const()[name = string("op_2514_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2514_end_mask_0 = const()[name = string("op_2514_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2514 = slice_by_index(begin = var_2514_begin_0, end = concat_11, end_mask = var_2514_end_mask_0, x = var_2511)[name = string("op_2514")]; tensor var_2516_shape = shape(x = var_2509)[name = string("op_2516_shape")]; int32 gather_229 = const()[name = string("gather_229"), val = int32(1)]; int32 gather_230 = const()[name = string("gather_230"), val = int32(2)]; int32 select_231 = const()[name = string("select_231"), val = int32(2)]; int32 gather_231_axis_0 = const()[name = string("gather_231_axis_0"), val = int32(0)]; int32 gather_231_batch_dims_0 = const()[name = string("gather_231_batch_dims_0"), val = int32(0)]; bool gather_231_validate_indices_0 = const()[name = string("gather_231_validate_indices_0"), val = bool(false)]; int32 gather_231 = gather(axis = gather_231_axis_0, batch_dims = gather_231_batch_dims_0, indices = select_231, validate_indices = gather_231_validate_indices_0, x = var_2516_shape)[name = string("gather_231")]; int32 gather_232 = const()[name = string("gather_232"), val = int32(128)]; tensor var_2523_axes_0 = const()[name = string("op_2523_axes_0"), val = tensor([2])]; tensor var_2523 = expand_dims(axes = var_2523_axes_0, x = var_2509)[name = string("op_2523")]; tensor shape_257 = shape(x = var_2523)[name = string("shape_257")]; int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (gather_229, gather_230, var_100, gather_231, gather_232))[name = string("concat_241")]; tensor real_div_24 = real_div(x = concat_241, y = shape_257)[name = string("real_div_24")]; tensor hidden_states_371 = tile(reps = real_div_24, x = var_2523)[name = string("hidden_states_371")]; tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, 16, -1, 128])]; tensor key_states_51 = reshape(shape = concat_242x, x = hidden_states_371)[name = string("key_states_51")]; tensor var_2533_shape = shape(x = var_2514)[name = string("op_2533_shape")]; int32 gather_233 = const()[name = string("gather_233"), val = int32(1)]; int32 gather_234 = const()[name = string("gather_234"), val = int32(2)]; int32 select_235 = const()[name = string("select_235"), val = int32(2)]; int32 gather_235_axis_0 = const()[name = string("gather_235_axis_0"), val = int32(0)]; int32 gather_235_batch_dims_0 = const()[name = string("gather_235_batch_dims_0"), val = int32(0)]; bool gather_235_validate_indices_0 = const()[name = string("gather_235_validate_indices_0"), val = bool(false)]; int32 gather_235 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235, validate_indices = gather_235_validate_indices_0, x = var_2533_shape)[name = string("gather_235")]; int32 gather_236 = const()[name = string("gather_236"), val = int32(128)]; tensor var_2540_axes_0 = const()[name = string("op_2540_axes_0"), val = tensor([2])]; tensor var_2540 = expand_dims(axes = var_2540_axes_0, x = var_2514)[name = string("op_2540")]; tensor shape_262 = shape(x = var_2540)[name = string("shape_262")]; int32 concat_243_axis_0 = const()[name = string("concat_243_axis_0"), val = int32(0)]; bool concat_243_interleave_0 = const()[name = string("concat_243_interleave_0"), val = bool(false)]; tensor concat_243 = concat(axis = concat_243_axis_0, interleave = concat_243_interleave_0, values = (gather_233, gather_234, var_100, gather_235, gather_236))[name = string("concat_243")]; tensor real_div_25 = real_div(x = concat_243, y = shape_262)[name = string("real_div_25")]; tensor hidden_states_375 = tile(reps = real_div_25, x = var_2540)[name = string("hidden_states_375")]; tensor concat_244x = const()[name = string("concat_244x"), val = tensor([1, 16, -1, 128])]; tensor value_states_51 = reshape(shape = concat_244x, x = hidden_states_375)[name = string("value_states_51")]; tensor var_2550_shape = shape(x = key_states_51)[name = string("op_2550_shape")]; int32 select_237 = const()[name = string("select_237"), val = int32(2)]; int32 gather_237_axis_0 = const()[name = string("gather_237_axis_0"), val = int32(0)]; int32 gather_237_batch_dims_0 = const()[name = string("gather_237_batch_dims_0"), val = int32(0)]; bool gather_237_validate_indices_0 = const()[name = string("gather_237_validate_indices_0"), val = bool(false)]; int32 gather_237 = gather(axis = gather_237_axis_0, batch_dims = gather_237_batch_dims_0, indices = select_237, validate_indices = gather_237_validate_indices_0, x = var_2550_shape)[name = string("gather_237")]; int32 concat_245_values0_0 = const()[name = string("concat_245_values0_0"), val = int32(1)]; int32 concat_245_values1_0 = const()[name = string("concat_245_values1_0"), val = int32(1)]; int32 concat_245_values2_0 = const()[name = string("concat_245_values2_0"), val = int32(0)]; int32 concat_245_axis_0 = const()[name = string("concat_245_axis_0"), val = int32(0)]; bool concat_245_interleave_0 = const()[name = string("concat_245_interleave_0"), val = bool(false)]; tensor concat_245 = concat(axis = concat_245_axis_0, interleave = concat_245_interleave_0, values = (concat_245_values0_0, concat_245_values1_0, concat_245_values2_0, gather_237))[name = string("concat_245")]; tensor causal_mask_27_begin_0 = const()[name = string("causal_mask_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_27_end_mask_0 = const()[name = string("causal_mask_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_27 = slice_by_index(begin = causal_mask_27_begin_0, end = concat_245, end_mask = causal_mask_27_end_mask_0, x = cast_0)[name = string("causal_mask_27")]; tensor attn_output_49 = scaled_dot_product_attention(attn_mask = causal_mask_27, key = key_states_51, query = query_states_51, value = value_states_51)[name = string("attn_output_49")]; tensor var_2556_perm_0 = const()[name = string("op_2556_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_246x = const()[name = string("concat_246x"), val = tensor([1, -1, 2048])]; tensor var_2556 = transpose(perm = var_2556_perm_0, x = attn_output_49)[name = string("transpose_92")]; tensor input_97 = reshape(shape = concat_246x, x = var_2556)[name = string("input_97")]; tensor linear_87 = linear(bias = linear_3_bias_0, weight = model_model_layers_12_self_attn_o_proj_weight_quantized, x = input_97)[name = string("linear_87")]; tensor hidden_states_379 = add(x = hidden_states_359, y = linear_87)[name = string("hidden_states_379")]; fp32 var_94_promoted_25 = const()[name = string("op_94_promoted_25"), val = fp32(0x1p+1)]; tensor var_2565 = pow(x = hidden_states_379, y = var_94_promoted_25)[name = string("op_2565")]; tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; tensor variance_51 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_2565)[name = string("variance_51")]; fp32 var_2568 = const()[name = string("op_2568"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2569 = add(x = variance_51, y = var_2568)[name = string("op_2569")]; fp32 var_2570_epsilon_0 = const()[name = string("op_2570_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2570 = rsqrt(epsilon = var_2570_epsilon_0, x = var_2569)[name = string("op_2570")]; tensor hidden_states_383 = mul(x = hidden_states_379, y = var_2570)[name = string("hidden_states_383")]; tensor input_99 = mul(x = model_model_layers_12_post_attention_layernorm_weight, y = hidden_states_383)[name = string("input_99")]; tensor linear_88 = linear(bias = linear_4_bias_0, weight = model_model_layers_12_mlp_gate_proj_weight_quantized, x = input_99)[name = string("linear_88")]; tensor var_2582 = silu(x = linear_88)[name = string("op_2582")]; tensor linear_89 = linear(bias = linear_4_bias_0, weight = model_model_layers_12_mlp_up_proj_weight_quantized, x = input_99)[name = string("linear_89")]; tensor input_103 = mul(x = var_2582, y = linear_89)[name = string("input_103")]; tensor linear_90 = linear(bias = linear_3_bias_0, weight = model_model_layers_12_mlp_down_proj_weight_quantized, x = input_103)[name = string("linear_90")]; tensor hidden_states_389 = add(x = hidden_states_379, y = linear_90)[name = string("hidden_states_389")]; fp32 var_94_promoted_26 = const()[name = string("op_94_promoted_26"), val = fp32(0x1p+1)]; tensor var_2595 = pow(x = hidden_states_389, y = var_94_promoted_26)[name = string("op_2595")]; tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; tensor variance_53 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_2595)[name = string("variance_53")]; fp32 var_2598 = const()[name = string("op_2598"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2599 = add(x = variance_53, y = var_2598)[name = string("op_2599")]; fp32 var_2600_epsilon_0 = const()[name = string("op_2600_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2600 = rsqrt(epsilon = var_2600_epsilon_0, x = var_2599)[name = string("op_2600")]; tensor hidden_states_393 = mul(x = hidden_states_389, y = var_2600)[name = string("hidden_states_393")]; tensor hidden_states_397 = mul(x = model_model_layers_13_input_layernorm_weight, y = hidden_states_393)[name = string("hidden_states_397")]; tensor linear_91 = linear(bias = model_model_layers_13_self_attn_q_proj_bias, weight = model_model_layers_13_self_attn_q_proj_weight_quantized, x = hidden_states_397)[name = string("linear_91")]; tensor linear_92 = linear(bias = model_model_layers_13_self_attn_k_proj_bias, weight = model_model_layers_13_self_attn_k_proj_weight_quantized, x = hidden_states_397)[name = string("linear_92")]; tensor linear_93 = linear(bias = model_model_layers_13_self_attn_v_proj_bias, weight = model_model_layers_13_self_attn_v_proj_weight_quantized, x = hidden_states_397)[name = string("linear_93")]; tensor concat_247x = const()[name = string("concat_247x"), val = tensor([1, -1, 16, 128])]; tensor var_2623 = reshape(shape = concat_247x, x = linear_91)[name = string("op_2623")]; tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_248x = const()[name = string("concat_248x"), val = tensor([1, -1, 2, 128])]; tensor var_2626 = reshape(shape = concat_248x, x = linear_92)[name = string("op_2626")]; tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_249x = const()[name = string("concat_249x"), val = tensor([1, -1, 2, 128])]; tensor var_2629 = reshape(shape = concat_249x, x = linear_93)[name = string("op_2629")]; tensor v_state_27_perm_0 = const()[name = string("v_state_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_27 = transpose(perm = q_27_perm_0, x = var_2623)[name = string("transpose_91")]; tensor var_2633 = mul(x = q_27, y = cos_7)[name = string("op_2633")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27)[name = string("x2_53")]; fp32 const_29_promoted = const()[name = string("const_29_promoted"), val = fp32(-0x1p+0)]; tensor var_2644 = mul(x = x2_53, y = const_29_promoted)[name = string("op_2644")]; bool var_2646_interleave_0 = const()[name = string("op_2646_interleave_0"), val = bool(false)]; tensor var_2646 = concat(axis = var_88, interleave = var_2646_interleave_0, values = (var_2644, x1_53))[name = string("op_2646")]; tensor var_2647 = mul(x = var_2646, y = sin_7)[name = string("op_2647")]; tensor query_states_55 = add(x = var_2633, y = var_2647)[name = string("query_states_55")]; tensor k_27 = transpose(perm = k_27_perm_0, x = var_2626)[name = string("transpose_90")]; tensor var_2649 = mul(x = k_27, y = cos_7)[name = string("op_2649")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27)[name = string("x2_55")]; fp32 const_30_promoted = const()[name = string("const_30_promoted"), val = fp32(-0x1p+0)]; tensor var_2660 = mul(x = x2_55, y = const_30_promoted)[name = string("op_2660")]; bool var_2662_interleave_0 = const()[name = string("op_2662_interleave_0"), val = bool(false)]; tensor var_2662 = concat(axis = var_88, interleave = var_2662_interleave_0, values = (var_2660, x1_55))[name = string("op_2662")]; tensor var_2663 = mul(x = var_2662, y = sin_7)[name = string("op_2663")]; tensor k_state_27 = add(x = var_2649, y = var_2663)[name = string("k_state_27")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([0])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = tensor([13])]; int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, expand_dims_156, expand_dims_157, expand_dims_2, expand_dims_159))[name = string("concat_252")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14 = slice_update(begin = concat_252, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27, x = key_cache_internal_tensor_assign_13_1)[name = string("key_cache_internal_tensor_assign_14")]; string cast_195_dtype_0 = const()[name = string("cast_195_dtype_0"), val = string("fp16")]; tensor cast_195 = cast(dtype = cast_195_dtype_0, x = key_cache_internal_tensor_assign_14)[name = string("cast_92")]; write_state(data = cast_195, input = key_cache)[name = string("coreml_update_state_98_write_state")]; tensor coreml_update_state_98 = read_state(input = key_cache)[name = string("coreml_update_state_98")]; string key_cache_internal_tensor_assign_14_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_14_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_27 = transpose(perm = v_state_27_perm_0, x = var_2629)[name = string("transpose_89")]; tensor value_cache_internal_tensor_assign_14 = slice_update(begin = concat_252, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27, x = value_cache_internal_tensor_assign_13_1)[name = string("value_cache_internal_tensor_assign_14")]; string cast_196_dtype_0 = const()[name = string("cast_196_dtype_0"), val = string("fp16")]; tensor cast_196 = cast(dtype = cast_196_dtype_0, x = value_cache_internal_tensor_assign_14)[name = string("cast_91")]; write_state(data = cast_196, input = value_cache)[name = string("coreml_update_state_99_write_state")]; tensor coreml_update_state_99 = read_state(input = value_cache)[name = string("coreml_update_state_99")]; string value_cache_internal_tensor_assign_14_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_14_dtype_0"), val = string("fp32")]; tensor var_2686_begin_0 = const()[name = string("op_2686_begin_0"), val = tensor([13, 0, 0, 0, 0])]; tensor var_2686_end_0 = const()[name = string("op_2686_end_0"), val = tensor([14, 1, 2, 2048, 128])]; tensor var_2686_end_mask_0 = const()[name = string("op_2686_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2686_squeeze_mask_0 = const()[name = string("op_2686_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_1 = cast(dtype = key_cache_internal_tensor_assign_14_dtype_0, x = coreml_update_state_98)[name = string("cast_90")]; tensor var_2686 = slice_by_index(begin = var_2686_begin_0, end = var_2686_end_0, end_mask = var_2686_end_mask_0, squeeze_mask = var_2686_squeeze_mask_0, x = key_cache_internal_tensor_assign_14_1)[name = string("op_2686")]; tensor var_2689_begin_0 = const()[name = string("op_2689_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2689_end_mask_0 = const()[name = string("op_2689_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2689 = slice_by_index(begin = var_2689_begin_0, end = concat_11, end_mask = var_2689_end_mask_0, x = var_2686)[name = string("op_2689")]; tensor var_2691_begin_0 = const()[name = string("op_2691_begin_0"), val = tensor([13, 0, 0, 0, 0])]; tensor var_2691_end_0 = const()[name = string("op_2691_end_0"), val = tensor([14, 1, 2, 2048, 128])]; tensor var_2691_end_mask_0 = const()[name = string("op_2691_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2691_squeeze_mask_0 = const()[name = string("op_2691_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_14_1 = cast(dtype = value_cache_internal_tensor_assign_14_dtype_0, x = coreml_update_state_99)[name = string("cast_89")]; tensor var_2691 = slice_by_index(begin = var_2691_begin_0, end = var_2691_end_0, end_mask = var_2691_end_mask_0, squeeze_mask = var_2691_squeeze_mask_0, x = value_cache_internal_tensor_assign_14_1)[name = string("op_2691")]; tensor var_2694_begin_0 = const()[name = string("op_2694_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2694_end_mask_0 = const()[name = string("op_2694_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2694 = slice_by_index(begin = var_2694_begin_0, end = concat_11, end_mask = var_2694_end_mask_0, x = var_2691)[name = string("op_2694")]; tensor var_2696_shape = shape(x = var_2689)[name = string("op_2696_shape")]; int32 gather_247 = const()[name = string("gather_247"), val = int32(1)]; int32 gather_248 = const()[name = string("gather_248"), val = int32(2)]; int32 select_249 = const()[name = string("select_249"), val = int32(2)]; int32 gather_249_axis_0 = const()[name = string("gather_249_axis_0"), val = int32(0)]; int32 gather_249_batch_dims_0 = const()[name = string("gather_249_batch_dims_0"), val = int32(0)]; bool gather_249_validate_indices_0 = const()[name = string("gather_249_validate_indices_0"), val = bool(false)]; int32 gather_249 = gather(axis = gather_249_axis_0, batch_dims = gather_249_batch_dims_0, indices = select_249, validate_indices = gather_249_validate_indices_0, x = var_2696_shape)[name = string("gather_249")]; int32 gather_250 = const()[name = string("gather_250"), val = int32(128)]; tensor var_2703_axes_0 = const()[name = string("op_2703_axes_0"), val = tensor([2])]; tensor var_2703 = expand_dims(axes = var_2703_axes_0, x = var_2689)[name = string("op_2703")]; tensor shape_277 = shape(x = var_2703)[name = string("shape_277")]; int32 concat_260_axis_0 = const()[name = string("concat_260_axis_0"), val = int32(0)]; bool concat_260_interleave_0 = const()[name = string("concat_260_interleave_0"), val = bool(false)]; tensor concat_260 = concat(axis = concat_260_axis_0, interleave = concat_260_interleave_0, values = (gather_247, gather_248, var_100, gather_249, gather_250))[name = string("concat_260")]; tensor real_div_26 = real_div(x = concat_260, y = shape_277)[name = string("real_div_26")]; tensor hidden_states_401 = tile(reps = real_div_26, x = var_2703)[name = string("hidden_states_401")]; tensor concat_261x = const()[name = string("concat_261x"), val = tensor([1, 16, -1, 128])]; tensor key_states_55 = reshape(shape = concat_261x, x = hidden_states_401)[name = string("key_states_55")]; tensor var_2713_shape = shape(x = var_2694)[name = string("op_2713_shape")]; int32 gather_251 = const()[name = string("gather_251"), val = int32(1)]; int32 gather_252 = const()[name = string("gather_252"), val = int32(2)]; int32 select_253 = const()[name = string("select_253"), val = int32(2)]; int32 gather_253_axis_0 = const()[name = string("gather_253_axis_0"), val = int32(0)]; int32 gather_253_batch_dims_0 = const()[name = string("gather_253_batch_dims_0"), val = int32(0)]; bool gather_253_validate_indices_0 = const()[name = string("gather_253_validate_indices_0"), val = bool(false)]; int32 gather_253 = gather(axis = gather_253_axis_0, batch_dims = gather_253_batch_dims_0, indices = select_253, validate_indices = gather_253_validate_indices_0, x = var_2713_shape)[name = string("gather_253")]; int32 gather_254 = const()[name = string("gather_254"), val = int32(128)]; tensor var_2720_axes_0 = const()[name = string("op_2720_axes_0"), val = tensor([2])]; tensor var_2720 = expand_dims(axes = var_2720_axes_0, x = var_2694)[name = string("op_2720")]; tensor shape_282 = shape(x = var_2720)[name = string("shape_282")]; int32 concat_262_axis_0 = const()[name = string("concat_262_axis_0"), val = int32(0)]; bool concat_262_interleave_0 = const()[name = string("concat_262_interleave_0"), val = bool(false)]; tensor concat_262 = concat(axis = concat_262_axis_0, interleave = concat_262_interleave_0, values = (gather_251, gather_252, var_100, gather_253, gather_254))[name = string("concat_262")]; tensor real_div_27 = real_div(x = concat_262, y = shape_282)[name = string("real_div_27")]; tensor hidden_states_405 = tile(reps = real_div_27, x = var_2720)[name = string("hidden_states_405")]; tensor concat_263x = const()[name = string("concat_263x"), val = tensor([1, 16, -1, 128])]; tensor value_states_55 = reshape(shape = concat_263x, x = hidden_states_405)[name = string("value_states_55")]; tensor var_2730_shape = shape(x = key_states_55)[name = string("op_2730_shape")]; int32 select_255 = const()[name = string("select_255"), val = int32(2)]; int32 gather_255_axis_0 = const()[name = string("gather_255_axis_0"), val = int32(0)]; int32 gather_255_batch_dims_0 = const()[name = string("gather_255_batch_dims_0"), val = int32(0)]; bool gather_255_validate_indices_0 = const()[name = string("gather_255_validate_indices_0"), val = bool(false)]; int32 gather_255 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = select_255, validate_indices = gather_255_validate_indices_0, x = var_2730_shape)[name = string("gather_255")]; int32 concat_264_values0_0 = const()[name = string("concat_264_values0_0"), val = int32(1)]; int32 concat_264_values1_0 = const()[name = string("concat_264_values1_0"), val = int32(1)]; int32 concat_264_values2_0 = const()[name = string("concat_264_values2_0"), val = int32(0)]; int32 concat_264_axis_0 = const()[name = string("concat_264_axis_0"), val = int32(0)]; bool concat_264_interleave_0 = const()[name = string("concat_264_interleave_0"), val = bool(false)]; tensor concat_264 = concat(axis = concat_264_axis_0, interleave = concat_264_interleave_0, values = (concat_264_values0_0, concat_264_values1_0, concat_264_values2_0, gather_255))[name = string("concat_264")]; tensor causal_mask_29_begin_0 = const()[name = string("causal_mask_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_29_end_mask_0 = const()[name = string("causal_mask_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_29 = slice_by_index(begin = causal_mask_29_begin_0, end = concat_264, end_mask = causal_mask_29_end_mask_0, x = cast_0)[name = string("causal_mask_29")]; tensor attn_output_53 = scaled_dot_product_attention(attn_mask = causal_mask_29, key = key_states_55, query = query_states_55, value = value_states_55)[name = string("attn_output_53")]; tensor var_2736_perm_0 = const()[name = string("op_2736_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 2048])]; tensor var_2736 = transpose(perm = var_2736_perm_0, x = attn_output_53)[name = string("transpose_88")]; tensor input_105 = reshape(shape = concat_265x, x = var_2736)[name = string("input_105")]; tensor linear_94 = linear(bias = linear_3_bias_0, weight = model_model_layers_13_self_attn_o_proj_weight_quantized, x = input_105)[name = string("linear_94")]; tensor hidden_states_409 = add(x = hidden_states_389, y = linear_94)[name = string("hidden_states_409")]; fp32 var_94_promoted_27 = const()[name = string("op_94_promoted_27"), val = fp32(0x1p+1)]; tensor var_2745 = pow(x = hidden_states_409, y = var_94_promoted_27)[name = string("op_2745")]; tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; tensor variance_55 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_2745)[name = string("variance_55")]; fp32 var_2748 = const()[name = string("op_2748"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2749 = add(x = variance_55, y = var_2748)[name = string("op_2749")]; fp32 var_2750_epsilon_0 = const()[name = string("op_2750_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2750 = rsqrt(epsilon = var_2750_epsilon_0, x = var_2749)[name = string("op_2750")]; tensor hidden_states_413 = mul(x = hidden_states_409, y = var_2750)[name = string("hidden_states_413")]; tensor input_107 = mul(x = model_model_layers_13_post_attention_layernorm_weight, y = hidden_states_413)[name = string("input_107")]; tensor linear_95 = linear(bias = linear_4_bias_0, weight = model_model_layers_13_mlp_gate_proj_weight_quantized, x = input_107)[name = string("linear_95")]; tensor var_2762 = silu(x = linear_95)[name = string("op_2762")]; tensor linear_96 = linear(bias = linear_4_bias_0, weight = model_model_layers_13_mlp_up_proj_weight_quantized, x = input_107)[name = string("linear_96")]; tensor input_111 = mul(x = var_2762, y = linear_96)[name = string("input_111")]; tensor linear_97 = linear(bias = linear_3_bias_0, weight = model_model_layers_13_mlp_down_proj_weight_quantized, x = input_111)[name = string("linear_97")]; tensor hidden_states_419 = add(x = hidden_states_409, y = linear_97)[name = string("hidden_states_419")]; fp32 var_94_promoted_28 = const()[name = string("op_94_promoted_28"), val = fp32(0x1p+1)]; tensor var_2775 = pow(x = hidden_states_419, y = var_94_promoted_28)[name = string("op_2775")]; tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; tensor variance_57 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_2775)[name = string("variance_57")]; fp32 var_2778 = const()[name = string("op_2778"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2779 = add(x = variance_57, y = var_2778)[name = string("op_2779")]; fp32 var_2780_epsilon_0 = const()[name = string("op_2780_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2780 = rsqrt(epsilon = var_2780_epsilon_0, x = var_2779)[name = string("op_2780")]; tensor hidden_states_423 = mul(x = hidden_states_419, y = var_2780)[name = string("hidden_states_423")]; tensor hidden_states_427 = mul(x = model_model_layers_14_input_layernorm_weight, y = hidden_states_423)[name = string("hidden_states_427")]; tensor linear_98 = linear(bias = model_model_layers_14_self_attn_q_proj_bias, weight = model_model_layers_14_self_attn_q_proj_weight_quantized, x = hidden_states_427)[name = string("linear_98")]; tensor linear_99 = linear(bias = model_model_layers_14_self_attn_k_proj_bias, weight = model_model_layers_14_self_attn_k_proj_weight_quantized, x = hidden_states_427)[name = string("linear_99")]; tensor linear_100 = linear(bias = model_model_layers_14_self_attn_v_proj_bias, weight = model_model_layers_14_self_attn_v_proj_weight_quantized, x = hidden_states_427)[name = string("linear_100")]; tensor concat_266x = const()[name = string("concat_266x"), val = tensor([1, -1, 16, 128])]; tensor var_2803 = reshape(shape = concat_266x, x = linear_98)[name = string("op_2803")]; tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_267x = const()[name = string("concat_267x"), val = tensor([1, -1, 2, 128])]; tensor var_2806 = reshape(shape = concat_267x, x = linear_99)[name = string("op_2806")]; tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_268x = const()[name = string("concat_268x"), val = tensor([1, -1, 2, 128])]; tensor var_2809 = reshape(shape = concat_268x, x = linear_100)[name = string("op_2809")]; tensor v_state_29_perm_0 = const()[name = string("v_state_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_29 = transpose(perm = q_29_perm_0, x = var_2803)[name = string("transpose_87")]; tensor var_2813 = mul(x = q_29, y = cos_7)[name = string("op_2813")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29)[name = string("x2_57")]; fp32 const_31_promoted = const()[name = string("const_31_promoted"), val = fp32(-0x1p+0)]; tensor var_2824 = mul(x = x2_57, y = const_31_promoted)[name = string("op_2824")]; bool var_2826_interleave_0 = const()[name = string("op_2826_interleave_0"), val = bool(false)]; tensor var_2826 = concat(axis = var_88, interleave = var_2826_interleave_0, values = (var_2824, x1_57))[name = string("op_2826")]; tensor var_2827 = mul(x = var_2826, y = sin_7)[name = string("op_2827")]; tensor query_states_59 = add(x = var_2813, y = var_2827)[name = string("query_states_59")]; tensor k_29 = transpose(perm = k_29_perm_0, x = var_2806)[name = string("transpose_86")]; tensor var_2829 = mul(x = k_29, y = cos_7)[name = string("op_2829")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29)[name = string("x2_59")]; fp32 const_32_promoted = const()[name = string("const_32_promoted"), val = fp32(-0x1p+0)]; tensor var_2840 = mul(x = x2_59, y = const_32_promoted)[name = string("op_2840")]; bool var_2842_interleave_0 = const()[name = string("op_2842_interleave_0"), val = bool(false)]; tensor var_2842 = concat(axis = var_88, interleave = var_2842_interleave_0, values = (var_2840, x1_59))[name = string("op_2842")]; tensor var_2843 = mul(x = var_2842, y = sin_7)[name = string("op_2843")]; tensor k_state_29 = add(x = var_2829, y = var_2843)[name = string("k_state_29")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([0])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor concat_271_values0_0 = const()[name = string("concat_271_values0_0"), val = tensor([14])]; int32 concat_271_axis_0 = const()[name = string("concat_271_axis_0"), val = int32(0)]; bool concat_271_interleave_0 = const()[name = string("concat_271_interleave_0"), val = bool(false)]; tensor concat_271 = concat(axis = concat_271_axis_0, interleave = concat_271_interleave_0, values = (concat_271_values0_0, expand_dims_168, expand_dims_169, expand_dims_2, expand_dims_171))[name = string("concat_271")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15 = slice_update(begin = concat_271, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29, x = key_cache_internal_tensor_assign_14_1)[name = string("key_cache_internal_tensor_assign_15")]; string cast_209_dtype_0 = const()[name = string("cast_209_dtype_0"), val = string("fp16")]; tensor cast_209 = cast(dtype = cast_209_dtype_0, x = key_cache_internal_tensor_assign_15)[name = string("cast_88")]; write_state(data = cast_209, input = key_cache)[name = string("coreml_update_state_100_write_state")]; tensor coreml_update_state_100 = read_state(input = key_cache)[name = string("coreml_update_state_100")]; string key_cache_internal_tensor_assign_15_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_15_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_29 = transpose(perm = v_state_29_perm_0, x = var_2809)[name = string("transpose_85")]; tensor value_cache_internal_tensor_assign_15 = slice_update(begin = concat_271, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29, x = value_cache_internal_tensor_assign_14_1)[name = string("value_cache_internal_tensor_assign_15")]; string cast_210_dtype_0 = const()[name = string("cast_210_dtype_0"), val = string("fp16")]; tensor cast_210 = cast(dtype = cast_210_dtype_0, x = value_cache_internal_tensor_assign_15)[name = string("cast_87")]; write_state(data = cast_210, input = value_cache)[name = string("coreml_update_state_101_write_state")]; tensor coreml_update_state_101 = read_state(input = value_cache)[name = string("coreml_update_state_101")]; string value_cache_internal_tensor_assign_15_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_15_dtype_0"), val = string("fp32")]; tensor var_2866_begin_0 = const()[name = string("op_2866_begin_0"), val = tensor([14, 0, 0, 0, 0])]; tensor var_2866_end_0 = const()[name = string("op_2866_end_0"), val = tensor([15, 1, 2, 2048, 128])]; tensor var_2866_end_mask_0 = const()[name = string("op_2866_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2866_squeeze_mask_0 = const()[name = string("op_2866_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_1 = cast(dtype = key_cache_internal_tensor_assign_15_dtype_0, x = coreml_update_state_100)[name = string("cast_86")]; tensor var_2866 = slice_by_index(begin = var_2866_begin_0, end = var_2866_end_0, end_mask = var_2866_end_mask_0, squeeze_mask = var_2866_squeeze_mask_0, x = key_cache_internal_tensor_assign_15_1)[name = string("op_2866")]; tensor var_2869_begin_0 = const()[name = string("op_2869_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2869_end_mask_0 = const()[name = string("op_2869_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2869 = slice_by_index(begin = var_2869_begin_0, end = concat_11, end_mask = var_2869_end_mask_0, x = var_2866)[name = string("op_2869")]; tensor var_2871_begin_0 = const()[name = string("op_2871_begin_0"), val = tensor([14, 0, 0, 0, 0])]; tensor var_2871_end_0 = const()[name = string("op_2871_end_0"), val = tensor([15, 1, 2, 2048, 128])]; tensor var_2871_end_mask_0 = const()[name = string("op_2871_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2871_squeeze_mask_0 = const()[name = string("op_2871_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_15_1 = cast(dtype = value_cache_internal_tensor_assign_15_dtype_0, x = coreml_update_state_101)[name = string("cast_85")]; tensor var_2871 = slice_by_index(begin = var_2871_begin_0, end = var_2871_end_0, end_mask = var_2871_end_mask_0, squeeze_mask = var_2871_squeeze_mask_0, x = value_cache_internal_tensor_assign_15_1)[name = string("op_2871")]; tensor var_2874_begin_0 = const()[name = string("op_2874_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2874_end_mask_0 = const()[name = string("op_2874_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2874 = slice_by_index(begin = var_2874_begin_0, end = concat_11, end_mask = var_2874_end_mask_0, x = var_2871)[name = string("op_2874")]; tensor var_2876_shape = shape(x = var_2869)[name = string("op_2876_shape")]; int32 gather_265 = const()[name = string("gather_265"), val = int32(1)]; int32 gather_266 = const()[name = string("gather_266"), val = int32(2)]; int32 select_267 = const()[name = string("select_267"), val = int32(2)]; int32 gather_267_axis_0 = const()[name = string("gather_267_axis_0"), val = int32(0)]; int32 gather_267_batch_dims_0 = const()[name = string("gather_267_batch_dims_0"), val = int32(0)]; bool gather_267_validate_indices_0 = const()[name = string("gather_267_validate_indices_0"), val = bool(false)]; int32 gather_267 = gather(axis = gather_267_axis_0, batch_dims = gather_267_batch_dims_0, indices = select_267, validate_indices = gather_267_validate_indices_0, x = var_2876_shape)[name = string("gather_267")]; int32 gather_268 = const()[name = string("gather_268"), val = int32(128)]; tensor var_2883_axes_0 = const()[name = string("op_2883_axes_0"), val = tensor([2])]; tensor var_2883 = expand_dims(axes = var_2883_axes_0, x = var_2869)[name = string("op_2883")]; tensor shape_297 = shape(x = var_2883)[name = string("shape_297")]; int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_265, gather_266, var_100, gather_267, gather_268))[name = string("concat_279")]; tensor real_div_28 = real_div(x = concat_279, y = shape_297)[name = string("real_div_28")]; tensor hidden_states_431 = tile(reps = real_div_28, x = var_2883)[name = string("hidden_states_431")]; tensor concat_280x = const()[name = string("concat_280x"), val = tensor([1, 16, -1, 128])]; tensor key_states_59 = reshape(shape = concat_280x, x = hidden_states_431)[name = string("key_states_59")]; tensor var_2893_shape = shape(x = var_2874)[name = string("op_2893_shape")]; int32 gather_269 = const()[name = string("gather_269"), val = int32(1)]; int32 gather_270 = const()[name = string("gather_270"), val = int32(2)]; int32 select_271 = const()[name = string("select_271"), val = int32(2)]; int32 gather_271_axis_0 = const()[name = string("gather_271_axis_0"), val = int32(0)]; int32 gather_271_batch_dims_0 = const()[name = string("gather_271_batch_dims_0"), val = int32(0)]; bool gather_271_validate_indices_0 = const()[name = string("gather_271_validate_indices_0"), val = bool(false)]; int32 gather_271 = gather(axis = gather_271_axis_0, batch_dims = gather_271_batch_dims_0, indices = select_271, validate_indices = gather_271_validate_indices_0, x = var_2893_shape)[name = string("gather_271")]; int32 gather_272 = const()[name = string("gather_272"), val = int32(128)]; tensor var_2900_axes_0 = const()[name = string("op_2900_axes_0"), val = tensor([2])]; tensor var_2900 = expand_dims(axes = var_2900_axes_0, x = var_2874)[name = string("op_2900")]; tensor shape_302 = shape(x = var_2900)[name = string("shape_302")]; int32 concat_281_axis_0 = const()[name = string("concat_281_axis_0"), val = int32(0)]; bool concat_281_interleave_0 = const()[name = string("concat_281_interleave_0"), val = bool(false)]; tensor concat_281 = concat(axis = concat_281_axis_0, interleave = concat_281_interleave_0, values = (gather_269, gather_270, var_100, gather_271, gather_272))[name = string("concat_281")]; tensor real_div_29 = real_div(x = concat_281, y = shape_302)[name = string("real_div_29")]; tensor hidden_states_435 = tile(reps = real_div_29, x = var_2900)[name = string("hidden_states_435")]; tensor concat_282x = const()[name = string("concat_282x"), val = tensor([1, 16, -1, 128])]; tensor value_states_59 = reshape(shape = concat_282x, x = hidden_states_435)[name = string("value_states_59")]; tensor var_2910_shape = shape(x = key_states_59)[name = string("op_2910_shape")]; int32 select_273 = const()[name = string("select_273"), val = int32(2)]; int32 gather_273_axis_0 = const()[name = string("gather_273_axis_0"), val = int32(0)]; int32 gather_273_batch_dims_0 = const()[name = string("gather_273_batch_dims_0"), val = int32(0)]; bool gather_273_validate_indices_0 = const()[name = string("gather_273_validate_indices_0"), val = bool(false)]; int32 gather_273 = gather(axis = gather_273_axis_0, batch_dims = gather_273_batch_dims_0, indices = select_273, validate_indices = gather_273_validate_indices_0, x = var_2910_shape)[name = string("gather_273")]; int32 concat_283_values0_0 = const()[name = string("concat_283_values0_0"), val = int32(1)]; int32 concat_283_values1_0 = const()[name = string("concat_283_values1_0"), val = int32(1)]; int32 concat_283_values2_0 = const()[name = string("concat_283_values2_0"), val = int32(0)]; int32 concat_283_axis_0 = const()[name = string("concat_283_axis_0"), val = int32(0)]; bool concat_283_interleave_0 = const()[name = string("concat_283_interleave_0"), val = bool(false)]; tensor concat_283 = concat(axis = concat_283_axis_0, interleave = concat_283_interleave_0, values = (concat_283_values0_0, concat_283_values1_0, concat_283_values2_0, gather_273))[name = string("concat_283")]; tensor causal_mask_31_begin_0 = const()[name = string("causal_mask_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_31_end_mask_0 = const()[name = string("causal_mask_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_31 = slice_by_index(begin = causal_mask_31_begin_0, end = concat_283, end_mask = causal_mask_31_end_mask_0, x = cast_0)[name = string("causal_mask_31")]; tensor attn_output_57 = scaled_dot_product_attention(attn_mask = causal_mask_31, key = key_states_59, query = query_states_59, value = value_states_59)[name = string("attn_output_57")]; tensor var_2916_perm_0 = const()[name = string("op_2916_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_284x = const()[name = string("concat_284x"), val = tensor([1, -1, 2048])]; tensor var_2916 = transpose(perm = var_2916_perm_0, x = attn_output_57)[name = string("transpose_84")]; tensor input_113 = reshape(shape = concat_284x, x = var_2916)[name = string("input_113")]; tensor linear_101 = linear(bias = linear_3_bias_0, weight = model_model_layers_14_self_attn_o_proj_weight_quantized, x = input_113)[name = string("linear_101")]; tensor hidden_states_439 = add(x = hidden_states_419, y = linear_101)[name = string("hidden_states_439")]; fp32 var_94_promoted_29 = const()[name = string("op_94_promoted_29"), val = fp32(0x1p+1)]; tensor var_2925 = pow(x = hidden_states_439, y = var_94_promoted_29)[name = string("op_2925")]; tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; tensor variance_59 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_2925)[name = string("variance_59")]; fp32 var_2928 = const()[name = string("op_2928"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2929 = add(x = variance_59, y = var_2928)[name = string("op_2929")]; fp32 var_2930_epsilon_0 = const()[name = string("op_2930_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2930 = rsqrt(epsilon = var_2930_epsilon_0, x = var_2929)[name = string("op_2930")]; tensor hidden_states_443 = mul(x = hidden_states_439, y = var_2930)[name = string("hidden_states_443")]; tensor input_115 = mul(x = model_model_layers_14_post_attention_layernorm_weight, y = hidden_states_443)[name = string("input_115")]; tensor linear_102 = linear(bias = linear_4_bias_0, weight = model_model_layers_14_mlp_gate_proj_weight_quantized, x = input_115)[name = string("linear_102")]; tensor var_2942 = silu(x = linear_102)[name = string("op_2942")]; tensor linear_103 = linear(bias = linear_4_bias_0, weight = model_model_layers_14_mlp_up_proj_weight_quantized, x = input_115)[name = string("linear_103")]; tensor input_119 = mul(x = var_2942, y = linear_103)[name = string("input_119")]; tensor linear_104 = linear(bias = linear_3_bias_0, weight = model_model_layers_14_mlp_down_proj_weight_quantized, x = input_119)[name = string("linear_104")]; tensor hidden_states_449 = add(x = hidden_states_439, y = linear_104)[name = string("hidden_states_449")]; fp32 var_94_promoted_30 = const()[name = string("op_94_promoted_30"), val = fp32(0x1p+1)]; tensor var_2955 = pow(x = hidden_states_449, y = var_94_promoted_30)[name = string("op_2955")]; tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; tensor variance_61 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_2955)[name = string("variance_61")]; fp32 var_2958 = const()[name = string("op_2958"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2959 = add(x = variance_61, y = var_2958)[name = string("op_2959")]; fp32 var_2960_epsilon_0 = const()[name = string("op_2960_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2960 = rsqrt(epsilon = var_2960_epsilon_0, x = var_2959)[name = string("op_2960")]; tensor hidden_states_453 = mul(x = hidden_states_449, y = var_2960)[name = string("hidden_states_453")]; tensor hidden_states_457 = mul(x = model_model_layers_15_input_layernorm_weight, y = hidden_states_453)[name = string("hidden_states_457")]; tensor linear_105 = linear(bias = model_model_layers_15_self_attn_q_proj_bias, weight = model_model_layers_15_self_attn_q_proj_weight_quantized, x = hidden_states_457)[name = string("linear_105")]; tensor linear_106 = linear(bias = model_model_layers_15_self_attn_k_proj_bias, weight = model_model_layers_15_self_attn_k_proj_weight_quantized, x = hidden_states_457)[name = string("linear_106")]; tensor linear_107 = linear(bias = model_model_layers_15_self_attn_v_proj_bias, weight = model_model_layers_15_self_attn_v_proj_weight_quantized, x = hidden_states_457)[name = string("linear_107")]; tensor concat_285x = const()[name = string("concat_285x"), val = tensor([1, -1, 16, 128])]; tensor var_2983 = reshape(shape = concat_285x, x = linear_105)[name = string("op_2983")]; tensor q_31_perm_0 = const()[name = string("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 2, 128])]; tensor var_2986 = reshape(shape = concat_286x, x = linear_106)[name = string("op_2986")]; tensor k_31_perm_0 = const()[name = string("k_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 2, 128])]; tensor var_2989 = reshape(shape = concat_287x, x = linear_107)[name = string("op_2989")]; tensor v_state_31_perm_0 = const()[name = string("v_state_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_31 = transpose(perm = q_31_perm_0, x = var_2983)[name = string("transpose_83")]; tensor var_2993 = mul(x = q_31, y = cos_7)[name = string("op_2993")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31)[name = string("x2_61")]; fp32 const_33_promoted = const()[name = string("const_33_promoted"), val = fp32(-0x1p+0)]; tensor var_3004 = mul(x = x2_61, y = const_33_promoted)[name = string("op_3004")]; bool var_3006_interleave_0 = const()[name = string("op_3006_interleave_0"), val = bool(false)]; tensor var_3006 = concat(axis = var_88, interleave = var_3006_interleave_0, values = (var_3004, x1_61))[name = string("op_3006")]; tensor var_3007 = mul(x = var_3006, y = sin_7)[name = string("op_3007")]; tensor query_states_63 = add(x = var_2993, y = var_3007)[name = string("query_states_63")]; tensor k_31 = transpose(perm = k_31_perm_0, x = var_2986)[name = string("transpose_82")]; tensor var_3009 = mul(x = k_31, y = cos_7)[name = string("op_3009")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31)[name = string("x1_63")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31)[name = string("x2_63")]; fp32 const_34_promoted = const()[name = string("const_34_promoted"), val = fp32(-0x1p+0)]; tensor var_3020 = mul(x = x2_63, y = const_34_promoted)[name = string("op_3020")]; bool var_3022_interleave_0 = const()[name = string("op_3022_interleave_0"), val = bool(false)]; tensor var_3022 = concat(axis = var_88, interleave = var_3022_interleave_0, values = (var_3020, x1_63))[name = string("op_3022")]; tensor var_3023 = mul(x = var_3022, y = sin_7)[name = string("op_3023")]; tensor k_state_31 = add(x = var_3009, y = var_3023)[name = string("k_state_31")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([0])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([15])]; int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_180, expand_dims_181, expand_dims_2, expand_dims_183))[name = string("concat_290")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16 = slice_update(begin = concat_290, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state_31, x = key_cache_internal_tensor_assign_15_1)[name = string("key_cache_internal_tensor_assign_16")]; string cast_223_dtype_0 = const()[name = string("cast_223_dtype_0"), val = string("fp16")]; tensor cast_223 = cast(dtype = cast_223_dtype_0, x = key_cache_internal_tensor_assign_16)[name = string("cast_84")]; write_state(data = cast_223, input = key_cache)[name = string("coreml_update_state_102_write_state")]; tensor coreml_update_state_102 = read_state(input = key_cache)[name = string("coreml_update_state_102")]; string key_cache_internal_tensor_assign_16_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_16_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_31 = transpose(perm = v_state_31_perm_0, x = var_2989)[name = string("transpose_81")]; tensor value_cache_internal_tensor_assign_16 = slice_update(begin = concat_290, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state_31, x = value_cache_internal_tensor_assign_15_1)[name = string("value_cache_internal_tensor_assign_16")]; string cast_224_dtype_0 = const()[name = string("cast_224_dtype_0"), val = string("fp16")]; tensor cast_224 = cast(dtype = cast_224_dtype_0, x = value_cache_internal_tensor_assign_16)[name = string("cast_83")]; write_state(data = cast_224, input = value_cache)[name = string("coreml_update_state_103_write_state")]; tensor coreml_update_state_103 = read_state(input = value_cache)[name = string("coreml_update_state_103")]; string value_cache_internal_tensor_assign_16_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_16_dtype_0"), val = string("fp32")]; tensor var_3046_begin_0 = const()[name = string("op_3046_begin_0"), val = tensor([15, 0, 0, 0, 0])]; tensor var_3046_end_0 = const()[name = string("op_3046_end_0"), val = tensor([16, 1, 2, 2048, 128])]; tensor var_3046_end_mask_0 = const()[name = string("op_3046_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3046_squeeze_mask_0 = const()[name = string("op_3046_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_1 = cast(dtype = key_cache_internal_tensor_assign_16_dtype_0, x = coreml_update_state_102)[name = string("cast_82")]; tensor var_3046 = slice_by_index(begin = var_3046_begin_0, end = var_3046_end_0, end_mask = var_3046_end_mask_0, squeeze_mask = var_3046_squeeze_mask_0, x = key_cache_internal_tensor_assign_16_1)[name = string("op_3046")]; tensor var_3049_begin_0 = const()[name = string("op_3049_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3049_end_mask_0 = const()[name = string("op_3049_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3049 = slice_by_index(begin = var_3049_begin_0, end = concat_11, end_mask = var_3049_end_mask_0, x = var_3046)[name = string("op_3049")]; tensor var_3051_begin_0 = const()[name = string("op_3051_begin_0"), val = tensor([15, 0, 0, 0, 0])]; tensor var_3051_end_0 = const()[name = string("op_3051_end_0"), val = tensor([16, 1, 2, 2048, 128])]; tensor var_3051_end_mask_0 = const()[name = string("op_3051_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3051_squeeze_mask_0 = const()[name = string("op_3051_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_16_1 = cast(dtype = value_cache_internal_tensor_assign_16_dtype_0, x = coreml_update_state_103)[name = string("cast_81")]; tensor var_3051 = slice_by_index(begin = var_3051_begin_0, end = var_3051_end_0, end_mask = var_3051_end_mask_0, squeeze_mask = var_3051_squeeze_mask_0, x = value_cache_internal_tensor_assign_16_1)[name = string("op_3051")]; tensor var_3054_begin_0 = const()[name = string("op_3054_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3054_end_mask_0 = const()[name = string("op_3054_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3054 = slice_by_index(begin = var_3054_begin_0, end = concat_11, end_mask = var_3054_end_mask_0, x = var_3051)[name = string("op_3054")]; tensor var_3056_shape = shape(x = var_3049)[name = string("op_3056_shape")]; int32 gather_283 = const()[name = string("gather_283"), val = int32(1)]; int32 gather_284 = const()[name = string("gather_284"), val = int32(2)]; int32 select_285 = const()[name = string("select_285"), val = int32(2)]; int32 gather_285_axis_0 = const()[name = string("gather_285_axis_0"), val = int32(0)]; int32 gather_285_batch_dims_0 = const()[name = string("gather_285_batch_dims_0"), val = int32(0)]; bool gather_285_validate_indices_0 = const()[name = string("gather_285_validate_indices_0"), val = bool(false)]; int32 gather_285 = gather(axis = gather_285_axis_0, batch_dims = gather_285_batch_dims_0, indices = select_285, validate_indices = gather_285_validate_indices_0, x = var_3056_shape)[name = string("gather_285")]; int32 gather_286 = const()[name = string("gather_286"), val = int32(128)]; tensor var_3063_axes_0 = const()[name = string("op_3063_axes_0"), val = tensor([2])]; tensor var_3063 = expand_dims(axes = var_3063_axes_0, x = var_3049)[name = string("op_3063")]; tensor shape_317 = shape(x = var_3063)[name = string("shape_317")]; int32 concat_298_axis_0 = const()[name = string("concat_298_axis_0"), val = int32(0)]; bool concat_298_interleave_0 = const()[name = string("concat_298_interleave_0"), val = bool(false)]; tensor concat_298 = concat(axis = concat_298_axis_0, interleave = concat_298_interleave_0, values = (gather_283, gather_284, var_100, gather_285, gather_286))[name = string("concat_298")]; tensor real_div_30 = real_div(x = concat_298, y = shape_317)[name = string("real_div_30")]; tensor hidden_states_461 = tile(reps = real_div_30, x = var_3063)[name = string("hidden_states_461")]; tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, 16, -1, 128])]; tensor key_states_63 = reshape(shape = concat_299x, x = hidden_states_461)[name = string("key_states_63")]; tensor var_3073_shape = shape(x = var_3054)[name = string("op_3073_shape")]; int32 gather_287 = const()[name = string("gather_287"), val = int32(1)]; int32 gather_288 = const()[name = string("gather_288"), val = int32(2)]; int32 select_289 = const()[name = string("select_289"), val = int32(2)]; int32 gather_289_axis_0 = const()[name = string("gather_289_axis_0"), val = int32(0)]; int32 gather_289_batch_dims_0 = const()[name = string("gather_289_batch_dims_0"), val = int32(0)]; bool gather_289_validate_indices_0 = const()[name = string("gather_289_validate_indices_0"), val = bool(false)]; int32 gather_289 = gather(axis = gather_289_axis_0, batch_dims = gather_289_batch_dims_0, indices = select_289, validate_indices = gather_289_validate_indices_0, x = var_3073_shape)[name = string("gather_289")]; int32 gather_290 = const()[name = string("gather_290"), val = int32(128)]; tensor var_3080_axes_0 = const()[name = string("op_3080_axes_0"), val = tensor([2])]; tensor var_3080 = expand_dims(axes = var_3080_axes_0, x = var_3054)[name = string("op_3080")]; tensor shape_322 = shape(x = var_3080)[name = string("shape_322")]; int32 concat_300_axis_0 = const()[name = string("concat_300_axis_0"), val = int32(0)]; bool concat_300_interleave_0 = const()[name = string("concat_300_interleave_0"), val = bool(false)]; tensor concat_300 = concat(axis = concat_300_axis_0, interleave = concat_300_interleave_0, values = (gather_287, gather_288, var_100, gather_289, gather_290))[name = string("concat_300")]; tensor real_div_31 = real_div(x = concat_300, y = shape_322)[name = string("real_div_31")]; tensor hidden_states_465 = tile(reps = real_div_31, x = var_3080)[name = string("hidden_states_465")]; tensor concat_301x = const()[name = string("concat_301x"), val = tensor([1, 16, -1, 128])]; tensor value_states_63 = reshape(shape = concat_301x, x = hidden_states_465)[name = string("value_states_63")]; tensor var_3090_shape = shape(x = key_states_63)[name = string("op_3090_shape")]; int32 select_291 = const()[name = string("select_291"), val = int32(2)]; int32 gather_291_axis_0 = const()[name = string("gather_291_axis_0"), val = int32(0)]; int32 gather_291_batch_dims_0 = const()[name = string("gather_291_batch_dims_0"), val = int32(0)]; bool gather_291_validate_indices_0 = const()[name = string("gather_291_validate_indices_0"), val = bool(false)]; int32 gather_291 = gather(axis = gather_291_axis_0, batch_dims = gather_291_batch_dims_0, indices = select_291, validate_indices = gather_291_validate_indices_0, x = var_3090_shape)[name = string("gather_291")]; int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(1)]; int32 concat_302_values1_0 = const()[name = string("concat_302_values1_0"), val = int32(1)]; int32 concat_302_values2_0 = const()[name = string("concat_302_values2_0"), val = int32(0)]; int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, concat_302_values1_0, concat_302_values2_0, gather_291))[name = string("concat_302")]; tensor causal_mask_33_begin_0 = const()[name = string("causal_mask_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_33_end_mask_0 = const()[name = string("causal_mask_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_33 = slice_by_index(begin = causal_mask_33_begin_0, end = concat_302, end_mask = causal_mask_33_end_mask_0, x = cast_0)[name = string("causal_mask_33")]; tensor attn_output_61 = scaled_dot_product_attention(attn_mask = causal_mask_33, key = key_states_63, query = query_states_63, value = value_states_63)[name = string("attn_output_61")]; tensor var_3096_perm_0 = const()[name = string("op_3096_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_303x = const()[name = string("concat_303x"), val = tensor([1, -1, 2048])]; tensor var_3096 = transpose(perm = var_3096_perm_0, x = attn_output_61)[name = string("transpose_80")]; tensor input_121 = reshape(shape = concat_303x, x = var_3096)[name = string("input_121")]; tensor linear_108 = linear(bias = linear_3_bias_0, weight = model_model_layers_15_self_attn_o_proj_weight_quantized, x = input_121)[name = string("linear_108")]; tensor hidden_states_469 = add(x = hidden_states_449, y = linear_108)[name = string("hidden_states_469")]; fp32 var_94_promoted_31 = const()[name = string("op_94_promoted_31"), val = fp32(0x1p+1)]; tensor var_3105 = pow(x = hidden_states_469, y = var_94_promoted_31)[name = string("op_3105")]; tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; tensor variance_63 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_3105)[name = string("variance_63")]; fp32 var_3108 = const()[name = string("op_3108"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3109 = add(x = variance_63, y = var_3108)[name = string("op_3109")]; fp32 var_3110_epsilon_0 = const()[name = string("op_3110_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3110 = rsqrt(epsilon = var_3110_epsilon_0, x = var_3109)[name = string("op_3110")]; tensor hidden_states_473 = mul(x = hidden_states_469, y = var_3110)[name = string("hidden_states_473")]; tensor input_123 = mul(x = model_model_layers_15_post_attention_layernorm_weight, y = hidden_states_473)[name = string("input_123")]; tensor linear_109 = linear(bias = linear_4_bias_0, weight = model_model_layers_15_mlp_gate_proj_weight_quantized, x = input_123)[name = string("linear_109")]; tensor var_3122 = silu(x = linear_109)[name = string("op_3122")]; tensor linear_110 = linear(bias = linear_4_bias_0, weight = model_model_layers_15_mlp_up_proj_weight_quantized, x = input_123)[name = string("linear_110")]; tensor input_127 = mul(x = var_3122, y = linear_110)[name = string("input_127")]; tensor linear_111 = linear(bias = linear_3_bias_0, weight = model_model_layers_15_mlp_down_proj_weight_quantized, x = input_127)[name = string("linear_111")]; tensor hidden_states_479 = add(x = hidden_states_469, y = linear_111)[name = string("hidden_states_479")]; fp32 var_94_promoted_32 = const()[name = string("op_94_promoted_32"), val = fp32(0x1p+1)]; tensor var_3135 = pow(x = hidden_states_479, y = var_94_promoted_32)[name = string("op_3135")]; tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; tensor variance_65 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_3135)[name = string("variance_65")]; fp32 var_3138 = const()[name = string("op_3138"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3139 = add(x = variance_65, y = var_3138)[name = string("op_3139")]; fp32 var_3140_epsilon_0 = const()[name = string("op_3140_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3140 = rsqrt(epsilon = var_3140_epsilon_0, x = var_3139)[name = string("op_3140")]; tensor hidden_states_483 = mul(x = hidden_states_479, y = var_3140)[name = string("hidden_states_483")]; tensor hidden_states_487 = mul(x = model_model_layers_16_input_layernorm_weight, y = hidden_states_483)[name = string("hidden_states_487")]; tensor linear_112 = linear(bias = model_model_layers_16_self_attn_q_proj_bias, weight = model_model_layers_16_self_attn_q_proj_weight_quantized, x = hidden_states_487)[name = string("linear_112")]; tensor linear_113 = linear(bias = model_model_layers_16_self_attn_k_proj_bias, weight = model_model_layers_16_self_attn_k_proj_weight_quantized, x = hidden_states_487)[name = string("linear_113")]; tensor linear_114 = linear(bias = model_model_layers_16_self_attn_v_proj_bias, weight = model_model_layers_16_self_attn_v_proj_weight_quantized, x = hidden_states_487)[name = string("linear_114")]; tensor concat_304x = const()[name = string("concat_304x"), val = tensor([1, -1, 16, 128])]; tensor var_3163 = reshape(shape = concat_304x, x = linear_112)[name = string("op_3163")]; tensor q_33_perm_0 = const()[name = string("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_305x = const()[name = string("concat_305x"), val = tensor([1, -1, 2, 128])]; tensor var_3166 = reshape(shape = concat_305x, x = linear_113)[name = string("op_3166")]; tensor k_33_perm_0 = const()[name = string("k_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_306x = const()[name = string("concat_306x"), val = tensor([1, -1, 2, 128])]; tensor var_3169 = reshape(shape = concat_306x, x = linear_114)[name = string("op_3169")]; tensor v_state_33_perm_0 = const()[name = string("v_state_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_33 = transpose(perm = q_33_perm_0, x = var_3163)[name = string("transpose_79")]; tensor var_3173 = mul(x = q_33, y = cos_7)[name = string("op_3173")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33)[name = string("x1_65")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33)[name = string("x2_65")]; fp32 const_35_promoted = const()[name = string("const_35_promoted"), val = fp32(-0x1p+0)]; tensor var_3184 = mul(x = x2_65, y = const_35_promoted)[name = string("op_3184")]; bool var_3186_interleave_0 = const()[name = string("op_3186_interleave_0"), val = bool(false)]; tensor var_3186 = concat(axis = var_88, interleave = var_3186_interleave_0, values = (var_3184, x1_65))[name = string("op_3186")]; tensor var_3187 = mul(x = var_3186, y = sin_7)[name = string("op_3187")]; tensor query_states_67 = add(x = var_3173, y = var_3187)[name = string("query_states_67")]; tensor k_33 = transpose(perm = k_33_perm_0, x = var_3166)[name = string("transpose_78")]; tensor var_3189 = mul(x = k_33, y = cos_7)[name = string("op_3189")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33)[name = string("x1_67")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33)[name = string("x2_67")]; fp32 const_36_promoted = const()[name = string("const_36_promoted"), val = fp32(-0x1p+0)]; tensor var_3200 = mul(x = x2_67, y = const_36_promoted)[name = string("op_3200")]; bool var_3202_interleave_0 = const()[name = string("op_3202_interleave_0"), val = bool(false)]; tensor var_3202 = concat(axis = var_88, interleave = var_3202_interleave_0, values = (var_3200, x1_67))[name = string("op_3202")]; tensor var_3203 = mul(x = var_3202, y = sin_7)[name = string("op_3203")]; tensor k_state_33 = add(x = var_3189, y = var_3203)[name = string("k_state_33")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor concat_309_values0_0 = const()[name = string("concat_309_values0_0"), val = tensor([16])]; int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (concat_309_values0_0, expand_dims_192, expand_dims_193, expand_dims_2, expand_dims_195))[name = string("concat_309")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_17 = slice_update(begin = concat_309, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = k_state_33, x = key_cache_internal_tensor_assign_16_1)[name = string("key_cache_internal_tensor_assign_17")]; string cast_237_dtype_0 = const()[name = string("cast_237_dtype_0"), val = string("fp16")]; tensor cast_237 = cast(dtype = cast_237_dtype_0, x = key_cache_internal_tensor_assign_17)[name = string("cast_80")]; write_state(data = cast_237, input = key_cache)[name = string("coreml_update_state_104_write_state")]; tensor coreml_update_state_104 = read_state(input = key_cache)[name = string("coreml_update_state_104")]; string key_cache_internal_tensor_assign_17_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_17_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_33 = transpose(perm = v_state_33_perm_0, x = var_3169)[name = string("transpose_77")]; tensor value_cache_internal_tensor_assign_17 = slice_update(begin = concat_309, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = v_state_33, x = value_cache_internal_tensor_assign_16_1)[name = string("value_cache_internal_tensor_assign_17")]; string cast_238_dtype_0 = const()[name = string("cast_238_dtype_0"), val = string("fp16")]; tensor cast_238 = cast(dtype = cast_238_dtype_0, x = value_cache_internal_tensor_assign_17)[name = string("cast_79")]; write_state(data = cast_238, input = value_cache)[name = string("coreml_update_state_105_write_state")]; tensor coreml_update_state_105 = read_state(input = value_cache)[name = string("coreml_update_state_105")]; string value_cache_internal_tensor_assign_17_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_17_dtype_0"), val = string("fp32")]; tensor var_3226_begin_0 = const()[name = string("op_3226_begin_0"), val = tensor([16, 0, 0, 0, 0])]; tensor var_3226_end_0 = const()[name = string("op_3226_end_0"), val = tensor([17, 1, 2, 2048, 128])]; tensor var_3226_end_mask_0 = const()[name = string("op_3226_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3226_squeeze_mask_0 = const()[name = string("op_3226_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_1 = cast(dtype = key_cache_internal_tensor_assign_17_dtype_0, x = coreml_update_state_104)[name = string("cast_78")]; tensor var_3226 = slice_by_index(begin = var_3226_begin_0, end = var_3226_end_0, end_mask = var_3226_end_mask_0, squeeze_mask = var_3226_squeeze_mask_0, x = key_cache_internal_tensor_assign_17_1)[name = string("op_3226")]; tensor var_3229_begin_0 = const()[name = string("op_3229_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3229_end_mask_0 = const()[name = string("op_3229_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3229 = slice_by_index(begin = var_3229_begin_0, end = concat_11, end_mask = var_3229_end_mask_0, x = var_3226)[name = string("op_3229")]; tensor var_3231_begin_0 = const()[name = string("op_3231_begin_0"), val = tensor([16, 0, 0, 0, 0])]; tensor var_3231_end_0 = const()[name = string("op_3231_end_0"), val = tensor([17, 1, 2, 2048, 128])]; tensor var_3231_end_mask_0 = const()[name = string("op_3231_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3231_squeeze_mask_0 = const()[name = string("op_3231_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_17_1 = cast(dtype = value_cache_internal_tensor_assign_17_dtype_0, x = coreml_update_state_105)[name = string("cast_77")]; tensor var_3231 = slice_by_index(begin = var_3231_begin_0, end = var_3231_end_0, end_mask = var_3231_end_mask_0, squeeze_mask = var_3231_squeeze_mask_0, x = value_cache_internal_tensor_assign_17_1)[name = string("op_3231")]; tensor var_3234_begin_0 = const()[name = string("op_3234_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3234_end_mask_0 = const()[name = string("op_3234_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3234 = slice_by_index(begin = var_3234_begin_0, end = concat_11, end_mask = var_3234_end_mask_0, x = var_3231)[name = string("op_3234")]; tensor var_3236_shape = shape(x = var_3229)[name = string("op_3236_shape")]; int32 gather_301 = const()[name = string("gather_301"), val = int32(1)]; int32 gather_302 = const()[name = string("gather_302"), val = int32(2)]; int32 select_303 = const()[name = string("select_303"), val = int32(2)]; int32 gather_303_axis_0 = const()[name = string("gather_303_axis_0"), val = int32(0)]; int32 gather_303_batch_dims_0 = const()[name = string("gather_303_batch_dims_0"), val = int32(0)]; bool gather_303_validate_indices_0 = const()[name = string("gather_303_validate_indices_0"), val = bool(false)]; int32 gather_303 = gather(axis = gather_303_axis_0, batch_dims = gather_303_batch_dims_0, indices = select_303, validate_indices = gather_303_validate_indices_0, x = var_3236_shape)[name = string("gather_303")]; int32 gather_304 = const()[name = string("gather_304"), val = int32(128)]; tensor var_3243_axes_0 = const()[name = string("op_3243_axes_0"), val = tensor([2])]; tensor var_3243 = expand_dims(axes = var_3243_axes_0, x = var_3229)[name = string("op_3243")]; tensor shape_337 = shape(x = var_3243)[name = string("shape_337")]; int32 concat_317_axis_0 = const()[name = string("concat_317_axis_0"), val = int32(0)]; bool concat_317_interleave_0 = const()[name = string("concat_317_interleave_0"), val = bool(false)]; tensor concat_317 = concat(axis = concat_317_axis_0, interleave = concat_317_interleave_0, values = (gather_301, gather_302, var_100, gather_303, gather_304))[name = string("concat_317")]; tensor real_div_32 = real_div(x = concat_317, y = shape_337)[name = string("real_div_32")]; tensor hidden_states_491 = tile(reps = real_div_32, x = var_3243)[name = string("hidden_states_491")]; tensor concat_318x = const()[name = string("concat_318x"), val = tensor([1, 16, -1, 128])]; tensor key_states_67 = reshape(shape = concat_318x, x = hidden_states_491)[name = string("key_states_67")]; tensor var_3253_shape = shape(x = var_3234)[name = string("op_3253_shape")]; int32 gather_305 = const()[name = string("gather_305"), val = int32(1)]; int32 gather_306 = const()[name = string("gather_306"), val = int32(2)]; int32 select_307 = const()[name = string("select_307"), val = int32(2)]; int32 gather_307_axis_0 = const()[name = string("gather_307_axis_0"), val = int32(0)]; int32 gather_307_batch_dims_0 = const()[name = string("gather_307_batch_dims_0"), val = int32(0)]; bool gather_307_validate_indices_0 = const()[name = string("gather_307_validate_indices_0"), val = bool(false)]; int32 gather_307 = gather(axis = gather_307_axis_0, batch_dims = gather_307_batch_dims_0, indices = select_307, validate_indices = gather_307_validate_indices_0, x = var_3253_shape)[name = string("gather_307")]; int32 gather_308 = const()[name = string("gather_308"), val = int32(128)]; tensor var_3260_axes_0 = const()[name = string("op_3260_axes_0"), val = tensor([2])]; tensor var_3260 = expand_dims(axes = var_3260_axes_0, x = var_3234)[name = string("op_3260")]; tensor shape_342 = shape(x = var_3260)[name = string("shape_342")]; int32 concat_319_axis_0 = const()[name = string("concat_319_axis_0"), val = int32(0)]; bool concat_319_interleave_0 = const()[name = string("concat_319_interleave_0"), val = bool(false)]; tensor concat_319 = concat(axis = concat_319_axis_0, interleave = concat_319_interleave_0, values = (gather_305, gather_306, var_100, gather_307, gather_308))[name = string("concat_319")]; tensor real_div_33 = real_div(x = concat_319, y = shape_342)[name = string("real_div_33")]; tensor hidden_states_495 = tile(reps = real_div_33, x = var_3260)[name = string("hidden_states_495")]; tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, 16, -1, 128])]; tensor value_states_67 = reshape(shape = concat_320x, x = hidden_states_495)[name = string("value_states_67")]; tensor var_3270_shape = shape(x = key_states_67)[name = string("op_3270_shape")]; int32 select_309 = const()[name = string("select_309"), val = int32(2)]; int32 gather_309_axis_0 = const()[name = string("gather_309_axis_0"), val = int32(0)]; int32 gather_309_batch_dims_0 = const()[name = string("gather_309_batch_dims_0"), val = int32(0)]; bool gather_309_validate_indices_0 = const()[name = string("gather_309_validate_indices_0"), val = bool(false)]; int32 gather_309 = gather(axis = gather_309_axis_0, batch_dims = gather_309_batch_dims_0, indices = select_309, validate_indices = gather_309_validate_indices_0, x = var_3270_shape)[name = string("gather_309")]; int32 concat_321_values0_0 = const()[name = string("concat_321_values0_0"), val = int32(1)]; int32 concat_321_values1_0 = const()[name = string("concat_321_values1_0"), val = int32(1)]; int32 concat_321_values2_0 = const()[name = string("concat_321_values2_0"), val = int32(0)]; int32 concat_321_axis_0 = const()[name = string("concat_321_axis_0"), val = int32(0)]; bool concat_321_interleave_0 = const()[name = string("concat_321_interleave_0"), val = bool(false)]; tensor concat_321 = concat(axis = concat_321_axis_0, interleave = concat_321_interleave_0, values = (concat_321_values0_0, concat_321_values1_0, concat_321_values2_0, gather_309))[name = string("concat_321")]; tensor causal_mask_35_begin_0 = const()[name = string("causal_mask_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_35_end_mask_0 = const()[name = string("causal_mask_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_35 = slice_by_index(begin = causal_mask_35_begin_0, end = concat_321, end_mask = causal_mask_35_end_mask_0, x = cast_0)[name = string("causal_mask_35")]; tensor attn_output_65 = scaled_dot_product_attention(attn_mask = causal_mask_35, key = key_states_67, query = query_states_67, value = value_states_67)[name = string("attn_output_65")]; tensor var_3276_perm_0 = const()[name = string("op_3276_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_322x = const()[name = string("concat_322x"), val = tensor([1, -1, 2048])]; tensor var_3276 = transpose(perm = var_3276_perm_0, x = attn_output_65)[name = string("transpose_76")]; tensor input_129 = reshape(shape = concat_322x, x = var_3276)[name = string("input_129")]; tensor linear_115 = linear(bias = linear_3_bias_0, weight = model_model_layers_16_self_attn_o_proj_weight_quantized, x = input_129)[name = string("linear_115")]; tensor hidden_states_499 = add(x = hidden_states_479, y = linear_115)[name = string("hidden_states_499")]; fp32 var_94_promoted_33 = const()[name = string("op_94_promoted_33"), val = fp32(0x1p+1)]; tensor var_3285 = pow(x = hidden_states_499, y = var_94_promoted_33)[name = string("op_3285")]; tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; tensor variance_67 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_3285)[name = string("variance_67")]; fp32 var_3288 = const()[name = string("op_3288"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3289 = add(x = variance_67, y = var_3288)[name = string("op_3289")]; fp32 var_3290_epsilon_0 = const()[name = string("op_3290_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3290 = rsqrt(epsilon = var_3290_epsilon_0, x = var_3289)[name = string("op_3290")]; tensor hidden_states_503 = mul(x = hidden_states_499, y = var_3290)[name = string("hidden_states_503")]; tensor input_131 = mul(x = model_model_layers_16_post_attention_layernorm_weight, y = hidden_states_503)[name = string("input_131")]; tensor linear_116 = linear(bias = linear_4_bias_0, weight = model_model_layers_16_mlp_gate_proj_weight_quantized, x = input_131)[name = string("linear_116")]; tensor var_3302 = silu(x = linear_116)[name = string("op_3302")]; tensor linear_117 = linear(bias = linear_4_bias_0, weight = model_model_layers_16_mlp_up_proj_weight_quantized, x = input_131)[name = string("linear_117")]; tensor input_135 = mul(x = var_3302, y = linear_117)[name = string("input_135")]; tensor linear_118 = linear(bias = linear_3_bias_0, weight = model_model_layers_16_mlp_down_proj_weight_quantized, x = input_135)[name = string("linear_118")]; tensor hidden_states_509 = add(x = hidden_states_499, y = linear_118)[name = string("hidden_states_509")]; fp32 var_94_promoted_34 = const()[name = string("op_94_promoted_34"), val = fp32(0x1p+1)]; tensor var_3315 = pow(x = hidden_states_509, y = var_94_promoted_34)[name = string("op_3315")]; tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; tensor variance_69 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_3315)[name = string("variance_69")]; fp32 var_3318 = const()[name = string("op_3318"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3319 = add(x = variance_69, y = var_3318)[name = string("op_3319")]; fp32 var_3320_epsilon_0 = const()[name = string("op_3320_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3320 = rsqrt(epsilon = var_3320_epsilon_0, x = var_3319)[name = string("op_3320")]; tensor hidden_states_513 = mul(x = hidden_states_509, y = var_3320)[name = string("hidden_states_513")]; tensor hidden_states_517 = mul(x = model_model_layers_17_input_layernorm_weight, y = hidden_states_513)[name = string("hidden_states_517")]; tensor linear_119 = linear(bias = model_model_layers_17_self_attn_q_proj_bias, weight = model_model_layers_17_self_attn_q_proj_weight_quantized, x = hidden_states_517)[name = string("linear_119")]; tensor linear_120 = linear(bias = model_model_layers_17_self_attn_k_proj_bias, weight = model_model_layers_17_self_attn_k_proj_weight_quantized, x = hidden_states_517)[name = string("linear_120")]; tensor linear_121 = linear(bias = model_model_layers_17_self_attn_v_proj_bias, weight = model_model_layers_17_self_attn_v_proj_weight_quantized, x = hidden_states_517)[name = string("linear_121")]; tensor concat_323x = const()[name = string("concat_323x"), val = tensor([1, -1, 16, 128])]; tensor var_3343 = reshape(shape = concat_323x, x = linear_119)[name = string("op_3343")]; tensor q_35_perm_0 = const()[name = string("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_324x = const()[name = string("concat_324x"), val = tensor([1, -1, 2, 128])]; tensor var_3346 = reshape(shape = concat_324x, x = linear_120)[name = string("op_3346")]; tensor k_35_perm_0 = const()[name = string("k_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 2, 128])]; tensor var_3349 = reshape(shape = concat_325x, x = linear_121)[name = string("op_3349")]; tensor v_state_35_perm_0 = const()[name = string("v_state_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_35 = transpose(perm = q_35_perm_0, x = var_3343)[name = string("transpose_75")]; tensor var_3353 = mul(x = q_35, y = cos_7)[name = string("op_3353")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35)[name = string("x1_69")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35)[name = string("x2_69")]; fp32 const_37_promoted = const()[name = string("const_37_promoted"), val = fp32(-0x1p+0)]; tensor var_3364 = mul(x = x2_69, y = const_37_promoted)[name = string("op_3364")]; bool var_3366_interleave_0 = const()[name = string("op_3366_interleave_0"), val = bool(false)]; tensor var_3366 = concat(axis = var_88, interleave = var_3366_interleave_0, values = (var_3364, x1_69))[name = string("op_3366")]; tensor var_3367 = mul(x = var_3366, y = sin_7)[name = string("op_3367")]; tensor query_states_71 = add(x = var_3353, y = var_3367)[name = string("query_states_71")]; tensor k_35 = transpose(perm = k_35_perm_0, x = var_3346)[name = string("transpose_74")]; tensor var_3369 = mul(x = k_35, y = cos_7)[name = string("op_3369")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35)[name = string("x1_71")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35)[name = string("x2_71")]; fp32 const_38_promoted = const()[name = string("const_38_promoted"), val = fp32(-0x1p+0)]; tensor var_3380 = mul(x = x2_71, y = const_38_promoted)[name = string("op_3380")]; bool var_3382_interleave_0 = const()[name = string("op_3382_interleave_0"), val = bool(false)]; tensor var_3382 = concat(axis = var_88, interleave = var_3382_interleave_0, values = (var_3380, x1_71))[name = string("op_3382")]; tensor var_3383 = mul(x = var_3382, y = sin_7)[name = string("op_3383")]; tensor k_state_35 = add(x = var_3369, y = var_3383)[name = string("k_state_35")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([0])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor concat_328_values0_0 = const()[name = string("concat_328_values0_0"), val = tensor([17])]; int32 concat_328_axis_0 = const()[name = string("concat_328_axis_0"), val = int32(0)]; bool concat_328_interleave_0 = const()[name = string("concat_328_interleave_0"), val = bool(false)]; tensor concat_328 = concat(axis = concat_328_axis_0, interleave = concat_328_interleave_0, values = (concat_328_values0_0, expand_dims_204, expand_dims_205, expand_dims_2, expand_dims_207))[name = string("concat_328")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_18 = slice_update(begin = concat_328, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = k_state_35, x = key_cache_internal_tensor_assign_17_1)[name = string("key_cache_internal_tensor_assign_18")]; string cast_251_dtype_0 = const()[name = string("cast_251_dtype_0"), val = string("fp16")]; tensor cast_251 = cast(dtype = cast_251_dtype_0, x = key_cache_internal_tensor_assign_18)[name = string("cast_76")]; write_state(data = cast_251, input = key_cache)[name = string("coreml_update_state_106_write_state")]; tensor coreml_update_state_106 = read_state(input = key_cache)[name = string("coreml_update_state_106")]; string key_cache_internal_tensor_assign_18_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_18_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_35 = transpose(perm = v_state_35_perm_0, x = var_3349)[name = string("transpose_73")]; tensor value_cache_internal_tensor_assign_18 = slice_update(begin = concat_328, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = v_state_35, x = value_cache_internal_tensor_assign_17_1)[name = string("value_cache_internal_tensor_assign_18")]; string cast_252_dtype_0 = const()[name = string("cast_252_dtype_0"), val = string("fp16")]; tensor cast_252 = cast(dtype = cast_252_dtype_0, x = value_cache_internal_tensor_assign_18)[name = string("cast_75")]; write_state(data = cast_252, input = value_cache)[name = string("coreml_update_state_107_write_state")]; tensor coreml_update_state_107 = read_state(input = value_cache)[name = string("coreml_update_state_107")]; string value_cache_internal_tensor_assign_18_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_18_dtype_0"), val = string("fp32")]; tensor var_3406_begin_0 = const()[name = string("op_3406_begin_0"), val = tensor([17, 0, 0, 0, 0])]; tensor var_3406_end_0 = const()[name = string("op_3406_end_0"), val = tensor([18, 1, 2, 2048, 128])]; tensor var_3406_end_mask_0 = const()[name = string("op_3406_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3406_squeeze_mask_0 = const()[name = string("op_3406_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_1 = cast(dtype = key_cache_internal_tensor_assign_18_dtype_0, x = coreml_update_state_106)[name = string("cast_74")]; tensor var_3406 = slice_by_index(begin = var_3406_begin_0, end = var_3406_end_0, end_mask = var_3406_end_mask_0, squeeze_mask = var_3406_squeeze_mask_0, x = key_cache_internal_tensor_assign_18_1)[name = string("op_3406")]; tensor var_3409_begin_0 = const()[name = string("op_3409_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3409_end_mask_0 = const()[name = string("op_3409_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3409 = slice_by_index(begin = var_3409_begin_0, end = concat_11, end_mask = var_3409_end_mask_0, x = var_3406)[name = string("op_3409")]; tensor var_3411_begin_0 = const()[name = string("op_3411_begin_0"), val = tensor([17, 0, 0, 0, 0])]; tensor var_3411_end_0 = const()[name = string("op_3411_end_0"), val = tensor([18, 1, 2, 2048, 128])]; tensor var_3411_end_mask_0 = const()[name = string("op_3411_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3411_squeeze_mask_0 = const()[name = string("op_3411_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_18_1 = cast(dtype = value_cache_internal_tensor_assign_18_dtype_0, x = coreml_update_state_107)[name = string("cast_73")]; tensor var_3411 = slice_by_index(begin = var_3411_begin_0, end = var_3411_end_0, end_mask = var_3411_end_mask_0, squeeze_mask = var_3411_squeeze_mask_0, x = value_cache_internal_tensor_assign_18_1)[name = string("op_3411")]; tensor var_3414_begin_0 = const()[name = string("op_3414_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3414_end_mask_0 = const()[name = string("op_3414_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3414 = slice_by_index(begin = var_3414_begin_0, end = concat_11, end_mask = var_3414_end_mask_0, x = var_3411)[name = string("op_3414")]; tensor var_3416_shape = shape(x = var_3409)[name = string("op_3416_shape")]; int32 gather_319 = const()[name = string("gather_319"), val = int32(1)]; int32 gather_320 = const()[name = string("gather_320"), val = int32(2)]; int32 select_321 = const()[name = string("select_321"), val = int32(2)]; int32 gather_321_axis_0 = const()[name = string("gather_321_axis_0"), val = int32(0)]; int32 gather_321_batch_dims_0 = const()[name = string("gather_321_batch_dims_0"), val = int32(0)]; bool gather_321_validate_indices_0 = const()[name = string("gather_321_validate_indices_0"), val = bool(false)]; int32 gather_321 = gather(axis = gather_321_axis_0, batch_dims = gather_321_batch_dims_0, indices = select_321, validate_indices = gather_321_validate_indices_0, x = var_3416_shape)[name = string("gather_321")]; int32 gather_322 = const()[name = string("gather_322"), val = int32(128)]; tensor var_3423_axes_0 = const()[name = string("op_3423_axes_0"), val = tensor([2])]; tensor var_3423 = expand_dims(axes = var_3423_axes_0, x = var_3409)[name = string("op_3423")]; tensor shape_357 = shape(x = var_3423)[name = string("shape_357")]; int32 concat_336_axis_0 = const()[name = string("concat_336_axis_0"), val = int32(0)]; bool concat_336_interleave_0 = const()[name = string("concat_336_interleave_0"), val = bool(false)]; tensor concat_336 = concat(axis = concat_336_axis_0, interleave = concat_336_interleave_0, values = (gather_319, gather_320, var_100, gather_321, gather_322))[name = string("concat_336")]; tensor real_div_34 = real_div(x = concat_336, y = shape_357)[name = string("real_div_34")]; tensor hidden_states_521 = tile(reps = real_div_34, x = var_3423)[name = string("hidden_states_521")]; tensor concat_337x = const()[name = string("concat_337x"), val = tensor([1, 16, -1, 128])]; tensor key_states_71 = reshape(shape = concat_337x, x = hidden_states_521)[name = string("key_states_71")]; tensor var_3433_shape = shape(x = var_3414)[name = string("op_3433_shape")]; int32 gather_323 = const()[name = string("gather_323"), val = int32(1)]; int32 gather_324 = const()[name = string("gather_324"), val = int32(2)]; int32 select_325 = const()[name = string("select_325"), val = int32(2)]; int32 gather_325_axis_0 = const()[name = string("gather_325_axis_0"), val = int32(0)]; int32 gather_325_batch_dims_0 = const()[name = string("gather_325_batch_dims_0"), val = int32(0)]; bool gather_325_validate_indices_0 = const()[name = string("gather_325_validate_indices_0"), val = bool(false)]; int32 gather_325 = gather(axis = gather_325_axis_0, batch_dims = gather_325_batch_dims_0, indices = select_325, validate_indices = gather_325_validate_indices_0, x = var_3433_shape)[name = string("gather_325")]; int32 gather_326 = const()[name = string("gather_326"), val = int32(128)]; tensor var_3440_axes_0 = const()[name = string("op_3440_axes_0"), val = tensor([2])]; tensor var_3440 = expand_dims(axes = var_3440_axes_0, x = var_3414)[name = string("op_3440")]; tensor shape_362 = shape(x = var_3440)[name = string("shape_362")]; int32 concat_338_axis_0 = const()[name = string("concat_338_axis_0"), val = int32(0)]; bool concat_338_interleave_0 = const()[name = string("concat_338_interleave_0"), val = bool(false)]; tensor concat_338 = concat(axis = concat_338_axis_0, interleave = concat_338_interleave_0, values = (gather_323, gather_324, var_100, gather_325, gather_326))[name = string("concat_338")]; tensor real_div_35 = real_div(x = concat_338, y = shape_362)[name = string("real_div_35")]; tensor hidden_states_525 = tile(reps = real_div_35, x = var_3440)[name = string("hidden_states_525")]; tensor concat_339x = const()[name = string("concat_339x"), val = tensor([1, 16, -1, 128])]; tensor value_states_71 = reshape(shape = concat_339x, x = hidden_states_525)[name = string("value_states_71")]; tensor var_3450_shape = shape(x = key_states_71)[name = string("op_3450_shape")]; int32 select_327 = const()[name = string("select_327"), val = int32(2)]; int32 gather_327_axis_0 = const()[name = string("gather_327_axis_0"), val = int32(0)]; int32 gather_327_batch_dims_0 = const()[name = string("gather_327_batch_dims_0"), val = int32(0)]; bool gather_327_validate_indices_0 = const()[name = string("gather_327_validate_indices_0"), val = bool(false)]; int32 gather_327 = gather(axis = gather_327_axis_0, batch_dims = gather_327_batch_dims_0, indices = select_327, validate_indices = gather_327_validate_indices_0, x = var_3450_shape)[name = string("gather_327")]; int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; int32 concat_340_values1_0 = const()[name = string("concat_340_values1_0"), val = int32(1)]; int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(0)]; int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, concat_340_values1_0, concat_340_values2_0, gather_327))[name = string("concat_340")]; tensor causal_mask_37_begin_0 = const()[name = string("causal_mask_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_37_end_mask_0 = const()[name = string("causal_mask_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_37 = slice_by_index(begin = causal_mask_37_begin_0, end = concat_340, end_mask = causal_mask_37_end_mask_0, x = cast_0)[name = string("causal_mask_37")]; tensor attn_output_69 = scaled_dot_product_attention(attn_mask = causal_mask_37, key = key_states_71, query = query_states_71, value = value_states_71)[name = string("attn_output_69")]; tensor var_3456_perm_0 = const()[name = string("op_3456_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_341x = const()[name = string("concat_341x"), val = tensor([1, -1, 2048])]; tensor var_3456 = transpose(perm = var_3456_perm_0, x = attn_output_69)[name = string("transpose_72")]; tensor input_137 = reshape(shape = concat_341x, x = var_3456)[name = string("input_137")]; tensor linear_122 = linear(bias = linear_3_bias_0, weight = model_model_layers_17_self_attn_o_proj_weight_quantized, x = input_137)[name = string("linear_122")]; tensor hidden_states_529 = add(x = hidden_states_509, y = linear_122)[name = string("hidden_states_529")]; fp32 var_94_promoted_35 = const()[name = string("op_94_promoted_35"), val = fp32(0x1p+1)]; tensor var_3465 = pow(x = hidden_states_529, y = var_94_promoted_35)[name = string("op_3465")]; tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; tensor variance_71 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_3465)[name = string("variance_71")]; fp32 var_3468 = const()[name = string("op_3468"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3469 = add(x = variance_71, y = var_3468)[name = string("op_3469")]; fp32 var_3470_epsilon_0 = const()[name = string("op_3470_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3470 = rsqrt(epsilon = var_3470_epsilon_0, x = var_3469)[name = string("op_3470")]; tensor hidden_states_533 = mul(x = hidden_states_529, y = var_3470)[name = string("hidden_states_533")]; tensor input_139 = mul(x = model_model_layers_17_post_attention_layernorm_weight, y = hidden_states_533)[name = string("input_139")]; tensor linear_123 = linear(bias = linear_4_bias_0, weight = model_model_layers_17_mlp_gate_proj_weight_quantized, x = input_139)[name = string("linear_123")]; tensor var_3482 = silu(x = linear_123)[name = string("op_3482")]; tensor linear_124 = linear(bias = linear_4_bias_0, weight = model_model_layers_17_mlp_up_proj_weight_quantized, x = input_139)[name = string("linear_124")]; tensor input_143 = mul(x = var_3482, y = linear_124)[name = string("input_143")]; tensor linear_125 = linear(bias = linear_3_bias_0, weight = model_model_layers_17_mlp_down_proj_weight_quantized, x = input_143)[name = string("linear_125")]; tensor hidden_states_539 = add(x = hidden_states_529, y = linear_125)[name = string("hidden_states_539")]; fp32 var_94_promoted_36 = const()[name = string("op_94_promoted_36"), val = fp32(0x1p+1)]; tensor var_3495 = pow(x = hidden_states_539, y = var_94_promoted_36)[name = string("op_3495")]; tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; tensor variance_73 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_3495)[name = string("variance_73")]; fp32 var_3498 = const()[name = string("op_3498"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3499 = add(x = variance_73, y = var_3498)[name = string("op_3499")]; fp32 var_3500_epsilon_0 = const()[name = string("op_3500_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3500 = rsqrt(epsilon = var_3500_epsilon_0, x = var_3499)[name = string("op_3500")]; tensor hidden_states_543 = mul(x = hidden_states_539, y = var_3500)[name = string("hidden_states_543")]; tensor hidden_states_547 = mul(x = model_model_layers_18_input_layernorm_weight, y = hidden_states_543)[name = string("hidden_states_547")]; tensor linear_126 = linear(bias = model_model_layers_18_self_attn_q_proj_bias, weight = model_model_layers_18_self_attn_q_proj_weight_quantized, x = hidden_states_547)[name = string("linear_126")]; tensor linear_127 = linear(bias = model_model_layers_18_self_attn_k_proj_bias, weight = model_model_layers_18_self_attn_k_proj_weight_quantized, x = hidden_states_547)[name = string("linear_127")]; tensor linear_128 = linear(bias = model_model_layers_18_self_attn_v_proj_bias, weight = model_model_layers_18_self_attn_v_proj_weight_quantized, x = hidden_states_547)[name = string("linear_128")]; tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 16, 128])]; tensor var_3523 = reshape(shape = concat_342x, x = linear_126)[name = string("op_3523")]; tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 2, 128])]; tensor var_3526 = reshape(shape = concat_343x, x = linear_127)[name = string("op_3526")]; tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 2, 128])]; tensor var_3529 = reshape(shape = concat_344x, x = linear_128)[name = string("op_3529")]; tensor v_state_37_perm_0 = const()[name = string("v_state_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_37 = transpose(perm = q_37_perm_0, x = var_3523)[name = string("transpose_71")]; tensor var_3533 = mul(x = q_37, y = cos_7)[name = string("op_3533")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37)[name = string("x1_73")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37)[name = string("x2_73")]; fp32 const_39_promoted = const()[name = string("const_39_promoted"), val = fp32(-0x1p+0)]; tensor var_3544 = mul(x = x2_73, y = const_39_promoted)[name = string("op_3544")]; bool var_3546_interleave_0 = const()[name = string("op_3546_interleave_0"), val = bool(false)]; tensor var_3546 = concat(axis = var_88, interleave = var_3546_interleave_0, values = (var_3544, x1_73))[name = string("op_3546")]; tensor var_3547 = mul(x = var_3546, y = sin_7)[name = string("op_3547")]; tensor query_states_75 = add(x = var_3533, y = var_3547)[name = string("query_states_75")]; tensor k_37 = transpose(perm = k_37_perm_0, x = var_3526)[name = string("transpose_70")]; tensor var_3549 = mul(x = k_37, y = cos_7)[name = string("op_3549")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37)[name = string("x1_75")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37)[name = string("x2_75")]; fp32 const_40_promoted = const()[name = string("const_40_promoted"), val = fp32(-0x1p+0)]; tensor var_3560 = mul(x = x2_75, y = const_40_promoted)[name = string("op_3560")]; bool var_3562_interleave_0 = const()[name = string("op_3562_interleave_0"), val = bool(false)]; tensor var_3562 = concat(axis = var_88, interleave = var_3562_interleave_0, values = (var_3560, x1_75))[name = string("op_3562")]; tensor var_3563 = mul(x = var_3562, y = sin_7)[name = string("op_3563")]; tensor k_state_37 = add(x = var_3549, y = var_3563)[name = string("k_state_37")]; tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([0])]; tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; tensor concat_347_values0_0 = const()[name = string("concat_347_values0_0"), val = tensor([18])]; int32 concat_347_axis_0 = const()[name = string("concat_347_axis_0"), val = int32(0)]; bool concat_347_interleave_0 = const()[name = string("concat_347_interleave_0"), val = bool(false)]; tensor concat_347 = concat(axis = concat_347_axis_0, interleave = concat_347_interleave_0, values = (concat_347_values0_0, expand_dims_216, expand_dims_217, expand_dims_2, expand_dims_219))[name = string("concat_347")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_19 = slice_update(begin = concat_347, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = k_state_37, x = key_cache_internal_tensor_assign_18_1)[name = string("key_cache_internal_tensor_assign_19")]; string cast_265_dtype_0 = const()[name = string("cast_265_dtype_0"), val = string("fp16")]; tensor cast_265 = cast(dtype = cast_265_dtype_0, x = key_cache_internal_tensor_assign_19)[name = string("cast_72")]; write_state(data = cast_265, input = key_cache)[name = string("coreml_update_state_108_write_state")]; tensor coreml_update_state_108 = read_state(input = key_cache)[name = string("coreml_update_state_108")]; string key_cache_internal_tensor_assign_19_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_19_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_37 = transpose(perm = v_state_37_perm_0, x = var_3529)[name = string("transpose_69")]; tensor value_cache_internal_tensor_assign_19 = slice_update(begin = concat_347, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = v_state_37, x = value_cache_internal_tensor_assign_18_1)[name = string("value_cache_internal_tensor_assign_19")]; string cast_266_dtype_0 = const()[name = string("cast_266_dtype_0"), val = string("fp16")]; tensor cast_266 = cast(dtype = cast_266_dtype_0, x = value_cache_internal_tensor_assign_19)[name = string("cast_71")]; write_state(data = cast_266, input = value_cache)[name = string("coreml_update_state_109_write_state")]; tensor coreml_update_state_109 = read_state(input = value_cache)[name = string("coreml_update_state_109")]; string value_cache_internal_tensor_assign_19_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_19_dtype_0"), val = string("fp32")]; tensor var_3586_begin_0 = const()[name = string("op_3586_begin_0"), val = tensor([18, 0, 0, 0, 0])]; tensor var_3586_end_0 = const()[name = string("op_3586_end_0"), val = tensor([19, 1, 2, 2048, 128])]; tensor var_3586_end_mask_0 = const()[name = string("op_3586_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3586_squeeze_mask_0 = const()[name = string("op_3586_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_1 = cast(dtype = key_cache_internal_tensor_assign_19_dtype_0, x = coreml_update_state_108)[name = string("cast_70")]; tensor var_3586 = slice_by_index(begin = var_3586_begin_0, end = var_3586_end_0, end_mask = var_3586_end_mask_0, squeeze_mask = var_3586_squeeze_mask_0, x = key_cache_internal_tensor_assign_19_1)[name = string("op_3586")]; tensor var_3589_begin_0 = const()[name = string("op_3589_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3589_end_mask_0 = const()[name = string("op_3589_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3589 = slice_by_index(begin = var_3589_begin_0, end = concat_11, end_mask = var_3589_end_mask_0, x = var_3586)[name = string("op_3589")]; tensor var_3591_begin_0 = const()[name = string("op_3591_begin_0"), val = tensor([18, 0, 0, 0, 0])]; tensor var_3591_end_0 = const()[name = string("op_3591_end_0"), val = tensor([19, 1, 2, 2048, 128])]; tensor var_3591_end_mask_0 = const()[name = string("op_3591_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3591_squeeze_mask_0 = const()[name = string("op_3591_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_19_1 = cast(dtype = value_cache_internal_tensor_assign_19_dtype_0, x = coreml_update_state_109)[name = string("cast_69")]; tensor var_3591 = slice_by_index(begin = var_3591_begin_0, end = var_3591_end_0, end_mask = var_3591_end_mask_0, squeeze_mask = var_3591_squeeze_mask_0, x = value_cache_internal_tensor_assign_19_1)[name = string("op_3591")]; tensor var_3594_begin_0 = const()[name = string("op_3594_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3594_end_mask_0 = const()[name = string("op_3594_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3594 = slice_by_index(begin = var_3594_begin_0, end = concat_11, end_mask = var_3594_end_mask_0, x = var_3591)[name = string("op_3594")]; tensor var_3596_shape = shape(x = var_3589)[name = string("op_3596_shape")]; int32 gather_337 = const()[name = string("gather_337"), val = int32(1)]; int32 gather_338 = const()[name = string("gather_338"), val = int32(2)]; int32 select_339 = const()[name = string("select_339"), val = int32(2)]; int32 gather_339_axis_0 = const()[name = string("gather_339_axis_0"), val = int32(0)]; int32 gather_339_batch_dims_0 = const()[name = string("gather_339_batch_dims_0"), val = int32(0)]; bool gather_339_validate_indices_0 = const()[name = string("gather_339_validate_indices_0"), val = bool(false)]; int32 gather_339 = gather(axis = gather_339_axis_0, batch_dims = gather_339_batch_dims_0, indices = select_339, validate_indices = gather_339_validate_indices_0, x = var_3596_shape)[name = string("gather_339")]; int32 gather_340 = const()[name = string("gather_340"), val = int32(128)]; tensor var_3603_axes_0 = const()[name = string("op_3603_axes_0"), val = tensor([2])]; tensor var_3603 = expand_dims(axes = var_3603_axes_0, x = var_3589)[name = string("op_3603")]; tensor shape_377 = shape(x = var_3603)[name = string("shape_377")]; int32 concat_355_axis_0 = const()[name = string("concat_355_axis_0"), val = int32(0)]; bool concat_355_interleave_0 = const()[name = string("concat_355_interleave_0"), val = bool(false)]; tensor concat_355 = concat(axis = concat_355_axis_0, interleave = concat_355_interleave_0, values = (gather_337, gather_338, var_100, gather_339, gather_340))[name = string("concat_355")]; tensor real_div_36 = real_div(x = concat_355, y = shape_377)[name = string("real_div_36")]; tensor hidden_states_551 = tile(reps = real_div_36, x = var_3603)[name = string("hidden_states_551")]; tensor concat_356x = const()[name = string("concat_356x"), val = tensor([1, 16, -1, 128])]; tensor key_states_75 = reshape(shape = concat_356x, x = hidden_states_551)[name = string("key_states_75")]; tensor var_3613_shape = shape(x = var_3594)[name = string("op_3613_shape")]; int32 gather_341 = const()[name = string("gather_341"), val = int32(1)]; int32 gather_342 = const()[name = string("gather_342"), val = int32(2)]; int32 select_343 = const()[name = string("select_343"), val = int32(2)]; int32 gather_343_axis_0 = const()[name = string("gather_343_axis_0"), val = int32(0)]; int32 gather_343_batch_dims_0 = const()[name = string("gather_343_batch_dims_0"), val = int32(0)]; bool gather_343_validate_indices_0 = const()[name = string("gather_343_validate_indices_0"), val = bool(false)]; int32 gather_343 = gather(axis = gather_343_axis_0, batch_dims = gather_343_batch_dims_0, indices = select_343, validate_indices = gather_343_validate_indices_0, x = var_3613_shape)[name = string("gather_343")]; int32 gather_344 = const()[name = string("gather_344"), val = int32(128)]; tensor var_3620_axes_0 = const()[name = string("op_3620_axes_0"), val = tensor([2])]; tensor var_3620 = expand_dims(axes = var_3620_axes_0, x = var_3594)[name = string("op_3620")]; tensor shape_382 = shape(x = var_3620)[name = string("shape_382")]; int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (gather_341, gather_342, var_100, gather_343, gather_344))[name = string("concat_357")]; tensor real_div_37 = real_div(x = concat_357, y = shape_382)[name = string("real_div_37")]; tensor hidden_states_555 = tile(reps = real_div_37, x = var_3620)[name = string("hidden_states_555")]; tensor concat_358x = const()[name = string("concat_358x"), val = tensor([1, 16, -1, 128])]; tensor value_states_75 = reshape(shape = concat_358x, x = hidden_states_555)[name = string("value_states_75")]; tensor var_3630_shape = shape(x = key_states_75)[name = string("op_3630_shape")]; int32 select_345 = const()[name = string("select_345"), val = int32(2)]; int32 gather_345_axis_0 = const()[name = string("gather_345_axis_0"), val = int32(0)]; int32 gather_345_batch_dims_0 = const()[name = string("gather_345_batch_dims_0"), val = int32(0)]; bool gather_345_validate_indices_0 = const()[name = string("gather_345_validate_indices_0"), val = bool(false)]; int32 gather_345 = gather(axis = gather_345_axis_0, batch_dims = gather_345_batch_dims_0, indices = select_345, validate_indices = gather_345_validate_indices_0, x = var_3630_shape)[name = string("gather_345")]; int32 concat_359_values0_0 = const()[name = string("concat_359_values0_0"), val = int32(1)]; int32 concat_359_values1_0 = const()[name = string("concat_359_values1_0"), val = int32(1)]; int32 concat_359_values2_0 = const()[name = string("concat_359_values2_0"), val = int32(0)]; int32 concat_359_axis_0 = const()[name = string("concat_359_axis_0"), val = int32(0)]; bool concat_359_interleave_0 = const()[name = string("concat_359_interleave_0"), val = bool(false)]; tensor concat_359 = concat(axis = concat_359_axis_0, interleave = concat_359_interleave_0, values = (concat_359_values0_0, concat_359_values1_0, concat_359_values2_0, gather_345))[name = string("concat_359")]; tensor causal_mask_39_begin_0 = const()[name = string("causal_mask_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_39_end_mask_0 = const()[name = string("causal_mask_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_39 = slice_by_index(begin = causal_mask_39_begin_0, end = concat_359, end_mask = causal_mask_39_end_mask_0, x = cast_0)[name = string("causal_mask_39")]; tensor attn_output_73 = scaled_dot_product_attention(attn_mask = causal_mask_39, key = key_states_75, query = query_states_75, value = value_states_75)[name = string("attn_output_73")]; tensor var_3636_perm_0 = const()[name = string("op_3636_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_360x = const()[name = string("concat_360x"), val = tensor([1, -1, 2048])]; tensor var_3636 = transpose(perm = var_3636_perm_0, x = attn_output_73)[name = string("transpose_68")]; tensor input_145 = reshape(shape = concat_360x, x = var_3636)[name = string("input_145")]; tensor linear_129 = linear(bias = linear_3_bias_0, weight = model_model_layers_18_self_attn_o_proj_weight_quantized, x = input_145)[name = string("linear_129")]; tensor hidden_states_559 = add(x = hidden_states_539, y = linear_129)[name = string("hidden_states_559")]; fp32 var_94_promoted_37 = const()[name = string("op_94_promoted_37"), val = fp32(0x1p+1)]; tensor var_3645 = pow(x = hidden_states_559, y = var_94_promoted_37)[name = string("op_3645")]; tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; tensor variance_75 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_3645)[name = string("variance_75")]; fp32 var_3648 = const()[name = string("op_3648"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3649 = add(x = variance_75, y = var_3648)[name = string("op_3649")]; fp32 var_3650_epsilon_0 = const()[name = string("op_3650_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3650 = rsqrt(epsilon = var_3650_epsilon_0, x = var_3649)[name = string("op_3650")]; tensor hidden_states_563 = mul(x = hidden_states_559, y = var_3650)[name = string("hidden_states_563")]; tensor input_147 = mul(x = model_model_layers_18_post_attention_layernorm_weight, y = hidden_states_563)[name = string("input_147")]; tensor linear_130 = linear(bias = linear_4_bias_0, weight = model_model_layers_18_mlp_gate_proj_weight_quantized, x = input_147)[name = string("linear_130")]; tensor var_3662 = silu(x = linear_130)[name = string("op_3662")]; tensor linear_131 = linear(bias = linear_4_bias_0, weight = model_model_layers_18_mlp_up_proj_weight_quantized, x = input_147)[name = string("linear_131")]; tensor input_151 = mul(x = var_3662, y = linear_131)[name = string("input_151")]; tensor linear_132 = linear(bias = linear_3_bias_0, weight = model_model_layers_18_mlp_down_proj_weight_quantized, x = input_151)[name = string("linear_132")]; tensor hidden_states_569 = add(x = hidden_states_559, y = linear_132)[name = string("hidden_states_569")]; fp32 var_94_promoted_38 = const()[name = string("op_94_promoted_38"), val = fp32(0x1p+1)]; tensor var_3675 = pow(x = hidden_states_569, y = var_94_promoted_38)[name = string("op_3675")]; tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; tensor variance_77 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_3675)[name = string("variance_77")]; fp32 var_3678 = const()[name = string("op_3678"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3679 = add(x = variance_77, y = var_3678)[name = string("op_3679")]; fp32 var_3680_epsilon_0 = const()[name = string("op_3680_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3680 = rsqrt(epsilon = var_3680_epsilon_0, x = var_3679)[name = string("op_3680")]; tensor hidden_states_573 = mul(x = hidden_states_569, y = var_3680)[name = string("hidden_states_573")]; tensor hidden_states_577 = mul(x = model_model_layers_19_input_layernorm_weight, y = hidden_states_573)[name = string("hidden_states_577")]; tensor linear_133 = linear(bias = model_model_layers_19_self_attn_q_proj_bias, weight = model_model_layers_19_self_attn_q_proj_weight_quantized, x = hidden_states_577)[name = string("linear_133")]; tensor linear_134 = linear(bias = model_model_layers_19_self_attn_k_proj_bias, weight = model_model_layers_19_self_attn_k_proj_weight_quantized, x = hidden_states_577)[name = string("linear_134")]; tensor linear_135 = linear(bias = model_model_layers_19_self_attn_v_proj_bias, weight = model_model_layers_19_self_attn_v_proj_weight_quantized, x = hidden_states_577)[name = string("linear_135")]; tensor concat_361x = const()[name = string("concat_361x"), val = tensor([1, -1, 16, 128])]; tensor var_3703 = reshape(shape = concat_361x, x = linear_133)[name = string("op_3703")]; tensor q_39_perm_0 = const()[name = string("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_362x = const()[name = string("concat_362x"), val = tensor([1, -1, 2, 128])]; tensor var_3706 = reshape(shape = concat_362x, x = linear_134)[name = string("op_3706")]; tensor k_39_perm_0 = const()[name = string("k_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_363x = const()[name = string("concat_363x"), val = tensor([1, -1, 2, 128])]; tensor var_3709 = reshape(shape = concat_363x, x = linear_135)[name = string("op_3709")]; tensor v_state_39_perm_0 = const()[name = string("v_state_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_39 = transpose(perm = q_39_perm_0, x = var_3703)[name = string("transpose_67")]; tensor var_3713 = mul(x = q_39, y = cos_7)[name = string("op_3713")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39)[name = string("x1_77")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39)[name = string("x2_77")]; fp32 const_41_promoted = const()[name = string("const_41_promoted"), val = fp32(-0x1p+0)]; tensor var_3724 = mul(x = x2_77, y = const_41_promoted)[name = string("op_3724")]; bool var_3726_interleave_0 = const()[name = string("op_3726_interleave_0"), val = bool(false)]; tensor var_3726 = concat(axis = var_88, interleave = var_3726_interleave_0, values = (var_3724, x1_77))[name = string("op_3726")]; tensor var_3727 = mul(x = var_3726, y = sin_7)[name = string("op_3727")]; tensor query_states_79 = add(x = var_3713, y = var_3727)[name = string("query_states_79")]; tensor k_39 = transpose(perm = k_39_perm_0, x = var_3706)[name = string("transpose_66")]; tensor var_3729 = mul(x = k_39, y = cos_7)[name = string("op_3729")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39)[name = string("x1_79")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39)[name = string("x2_79")]; fp32 const_42_promoted = const()[name = string("const_42_promoted"), val = fp32(-0x1p+0)]; tensor var_3740 = mul(x = x2_79, y = const_42_promoted)[name = string("op_3740")]; bool var_3742_interleave_0 = const()[name = string("op_3742_interleave_0"), val = bool(false)]; tensor var_3742 = concat(axis = var_88, interleave = var_3742_interleave_0, values = (var_3740, x1_79))[name = string("op_3742")]; tensor var_3743 = mul(x = var_3742, y = sin_7)[name = string("op_3743")]; tensor k_state_39 = add(x = var_3729, y = var_3743)[name = string("k_state_39")]; tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([0])]; tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; tensor concat_366_values0_0 = const()[name = string("concat_366_values0_0"), val = tensor([19])]; int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (concat_366_values0_0, expand_dims_228, expand_dims_229, expand_dims_2, expand_dims_231))[name = string("concat_366")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_20 = slice_update(begin = concat_366, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = k_state_39, x = key_cache_internal_tensor_assign_19_1)[name = string("key_cache_internal_tensor_assign_20")]; string cast_279_dtype_0 = const()[name = string("cast_279_dtype_0"), val = string("fp16")]; tensor cast_279 = cast(dtype = cast_279_dtype_0, x = key_cache_internal_tensor_assign_20)[name = string("cast_68")]; write_state(data = cast_279, input = key_cache)[name = string("coreml_update_state_110_write_state")]; tensor coreml_update_state_110 = read_state(input = key_cache)[name = string("coreml_update_state_110")]; string key_cache_internal_tensor_assign_20_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_20_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_39 = transpose(perm = v_state_39_perm_0, x = var_3709)[name = string("transpose_65")]; tensor value_cache_internal_tensor_assign_20 = slice_update(begin = concat_366, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = v_state_39, x = value_cache_internal_tensor_assign_19_1)[name = string("value_cache_internal_tensor_assign_20")]; string cast_280_dtype_0 = const()[name = string("cast_280_dtype_0"), val = string("fp16")]; tensor cast_280 = cast(dtype = cast_280_dtype_0, x = value_cache_internal_tensor_assign_20)[name = string("cast_67")]; write_state(data = cast_280, input = value_cache)[name = string("coreml_update_state_111_write_state")]; tensor coreml_update_state_111 = read_state(input = value_cache)[name = string("coreml_update_state_111")]; string value_cache_internal_tensor_assign_20_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_20_dtype_0"), val = string("fp32")]; tensor var_3766_begin_0 = const()[name = string("op_3766_begin_0"), val = tensor([19, 0, 0, 0, 0])]; tensor var_3766_end_0 = const()[name = string("op_3766_end_0"), val = tensor([20, 1, 2, 2048, 128])]; tensor var_3766_end_mask_0 = const()[name = string("op_3766_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3766_squeeze_mask_0 = const()[name = string("op_3766_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_1 = cast(dtype = key_cache_internal_tensor_assign_20_dtype_0, x = coreml_update_state_110)[name = string("cast_66")]; tensor var_3766 = slice_by_index(begin = var_3766_begin_0, end = var_3766_end_0, end_mask = var_3766_end_mask_0, squeeze_mask = var_3766_squeeze_mask_0, x = key_cache_internal_tensor_assign_20_1)[name = string("op_3766")]; tensor var_3769_begin_0 = const()[name = string("op_3769_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3769_end_mask_0 = const()[name = string("op_3769_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3769 = slice_by_index(begin = var_3769_begin_0, end = concat_11, end_mask = var_3769_end_mask_0, x = var_3766)[name = string("op_3769")]; tensor var_3771_begin_0 = const()[name = string("op_3771_begin_0"), val = tensor([19, 0, 0, 0, 0])]; tensor var_3771_end_0 = const()[name = string("op_3771_end_0"), val = tensor([20, 1, 2, 2048, 128])]; tensor var_3771_end_mask_0 = const()[name = string("op_3771_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3771_squeeze_mask_0 = const()[name = string("op_3771_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_20_1 = cast(dtype = value_cache_internal_tensor_assign_20_dtype_0, x = coreml_update_state_111)[name = string("cast_65")]; tensor var_3771 = slice_by_index(begin = var_3771_begin_0, end = var_3771_end_0, end_mask = var_3771_end_mask_0, squeeze_mask = var_3771_squeeze_mask_0, x = value_cache_internal_tensor_assign_20_1)[name = string("op_3771")]; tensor var_3774_begin_0 = const()[name = string("op_3774_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3774_end_mask_0 = const()[name = string("op_3774_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3774 = slice_by_index(begin = var_3774_begin_0, end = concat_11, end_mask = var_3774_end_mask_0, x = var_3771)[name = string("op_3774")]; tensor var_3776_shape = shape(x = var_3769)[name = string("op_3776_shape")]; int32 gather_355 = const()[name = string("gather_355"), val = int32(1)]; int32 gather_356 = const()[name = string("gather_356"), val = int32(2)]; int32 select_357 = const()[name = string("select_357"), val = int32(2)]; int32 gather_357_axis_0 = const()[name = string("gather_357_axis_0"), val = int32(0)]; int32 gather_357_batch_dims_0 = const()[name = string("gather_357_batch_dims_0"), val = int32(0)]; bool gather_357_validate_indices_0 = const()[name = string("gather_357_validate_indices_0"), val = bool(false)]; int32 gather_357 = gather(axis = gather_357_axis_0, batch_dims = gather_357_batch_dims_0, indices = select_357, validate_indices = gather_357_validate_indices_0, x = var_3776_shape)[name = string("gather_357")]; int32 gather_358 = const()[name = string("gather_358"), val = int32(128)]; tensor var_3783_axes_0 = const()[name = string("op_3783_axes_0"), val = tensor([2])]; tensor var_3783 = expand_dims(axes = var_3783_axes_0, x = var_3769)[name = string("op_3783")]; tensor shape_397 = shape(x = var_3783)[name = string("shape_397")]; int32 concat_374_axis_0 = const()[name = string("concat_374_axis_0"), val = int32(0)]; bool concat_374_interleave_0 = const()[name = string("concat_374_interleave_0"), val = bool(false)]; tensor concat_374 = concat(axis = concat_374_axis_0, interleave = concat_374_interleave_0, values = (gather_355, gather_356, var_100, gather_357, gather_358))[name = string("concat_374")]; tensor real_div_38 = real_div(x = concat_374, y = shape_397)[name = string("real_div_38")]; tensor hidden_states_581 = tile(reps = real_div_38, x = var_3783)[name = string("hidden_states_581")]; tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, 16, -1, 128])]; tensor key_states_79 = reshape(shape = concat_375x, x = hidden_states_581)[name = string("key_states_79")]; tensor var_3793_shape = shape(x = var_3774)[name = string("op_3793_shape")]; int32 gather_359 = const()[name = string("gather_359"), val = int32(1)]; int32 gather_360 = const()[name = string("gather_360"), val = int32(2)]; int32 select_361 = const()[name = string("select_361"), val = int32(2)]; int32 gather_361_axis_0 = const()[name = string("gather_361_axis_0"), val = int32(0)]; int32 gather_361_batch_dims_0 = const()[name = string("gather_361_batch_dims_0"), val = int32(0)]; bool gather_361_validate_indices_0 = const()[name = string("gather_361_validate_indices_0"), val = bool(false)]; int32 gather_361 = gather(axis = gather_361_axis_0, batch_dims = gather_361_batch_dims_0, indices = select_361, validate_indices = gather_361_validate_indices_0, x = var_3793_shape)[name = string("gather_361")]; int32 gather_362 = const()[name = string("gather_362"), val = int32(128)]; tensor var_3800_axes_0 = const()[name = string("op_3800_axes_0"), val = tensor([2])]; tensor var_3800 = expand_dims(axes = var_3800_axes_0, x = var_3774)[name = string("op_3800")]; tensor shape_402 = shape(x = var_3800)[name = string("shape_402")]; int32 concat_376_axis_0 = const()[name = string("concat_376_axis_0"), val = int32(0)]; bool concat_376_interleave_0 = const()[name = string("concat_376_interleave_0"), val = bool(false)]; tensor concat_376 = concat(axis = concat_376_axis_0, interleave = concat_376_interleave_0, values = (gather_359, gather_360, var_100, gather_361, gather_362))[name = string("concat_376")]; tensor real_div_39 = real_div(x = concat_376, y = shape_402)[name = string("real_div_39")]; tensor hidden_states_585 = tile(reps = real_div_39, x = var_3800)[name = string("hidden_states_585")]; tensor concat_377x = const()[name = string("concat_377x"), val = tensor([1, 16, -1, 128])]; tensor value_states_79 = reshape(shape = concat_377x, x = hidden_states_585)[name = string("value_states_79")]; tensor var_3810_shape = shape(x = key_states_79)[name = string("op_3810_shape")]; int32 select_363 = const()[name = string("select_363"), val = int32(2)]; int32 gather_363_axis_0 = const()[name = string("gather_363_axis_0"), val = int32(0)]; int32 gather_363_batch_dims_0 = const()[name = string("gather_363_batch_dims_0"), val = int32(0)]; bool gather_363_validate_indices_0 = const()[name = string("gather_363_validate_indices_0"), val = bool(false)]; int32 gather_363 = gather(axis = gather_363_axis_0, batch_dims = gather_363_batch_dims_0, indices = select_363, validate_indices = gather_363_validate_indices_0, x = var_3810_shape)[name = string("gather_363")]; int32 concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = int32(1)]; int32 concat_378_values1_0 = const()[name = string("concat_378_values1_0"), val = int32(1)]; int32 concat_378_values2_0 = const()[name = string("concat_378_values2_0"), val = int32(0)]; int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, concat_378_values1_0, concat_378_values2_0, gather_363))[name = string("concat_378")]; tensor causal_mask_41_begin_0 = const()[name = string("causal_mask_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_41_end_mask_0 = const()[name = string("causal_mask_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_41 = slice_by_index(begin = causal_mask_41_begin_0, end = concat_378, end_mask = causal_mask_41_end_mask_0, x = cast_0)[name = string("causal_mask_41")]; tensor attn_output_77 = scaled_dot_product_attention(attn_mask = causal_mask_41, key = key_states_79, query = query_states_79, value = value_states_79)[name = string("attn_output_77")]; tensor var_3816_perm_0 = const()[name = string("op_3816_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_379x = const()[name = string("concat_379x"), val = tensor([1, -1, 2048])]; tensor var_3816 = transpose(perm = var_3816_perm_0, x = attn_output_77)[name = string("transpose_64")]; tensor input_153 = reshape(shape = concat_379x, x = var_3816)[name = string("input_153")]; tensor linear_136 = linear(bias = linear_3_bias_0, weight = model_model_layers_19_self_attn_o_proj_weight_quantized, x = input_153)[name = string("linear_136")]; tensor hidden_states_589 = add(x = hidden_states_569, y = linear_136)[name = string("hidden_states_589")]; fp32 var_94_promoted_39 = const()[name = string("op_94_promoted_39"), val = fp32(0x1p+1)]; tensor var_3825 = pow(x = hidden_states_589, y = var_94_promoted_39)[name = string("op_3825")]; tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; tensor variance_79 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_3825)[name = string("variance_79")]; fp32 var_3828 = const()[name = string("op_3828"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3829 = add(x = variance_79, y = var_3828)[name = string("op_3829")]; fp32 var_3830_epsilon_0 = const()[name = string("op_3830_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3830 = rsqrt(epsilon = var_3830_epsilon_0, x = var_3829)[name = string("op_3830")]; tensor hidden_states_593 = mul(x = hidden_states_589, y = var_3830)[name = string("hidden_states_593")]; tensor input_155 = mul(x = model_model_layers_19_post_attention_layernorm_weight, y = hidden_states_593)[name = string("input_155")]; tensor linear_137 = linear(bias = linear_4_bias_0, weight = model_model_layers_19_mlp_gate_proj_weight_quantized, x = input_155)[name = string("linear_137")]; tensor var_3842 = silu(x = linear_137)[name = string("op_3842")]; tensor linear_138 = linear(bias = linear_4_bias_0, weight = model_model_layers_19_mlp_up_proj_weight_quantized, x = input_155)[name = string("linear_138")]; tensor input_159 = mul(x = var_3842, y = linear_138)[name = string("input_159")]; tensor linear_139 = linear(bias = linear_3_bias_0, weight = model_model_layers_19_mlp_down_proj_weight_quantized, x = input_159)[name = string("linear_139")]; tensor hidden_states_599 = add(x = hidden_states_589, y = linear_139)[name = string("hidden_states_599")]; fp32 var_94_promoted_40 = const()[name = string("op_94_promoted_40"), val = fp32(0x1p+1)]; tensor var_3855 = pow(x = hidden_states_599, y = var_94_promoted_40)[name = string("op_3855")]; tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; tensor variance_81 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_3855)[name = string("variance_81")]; fp32 var_3858 = const()[name = string("op_3858"), val = fp32(0x1.0c6f7ap-20)]; tensor var_3859 = add(x = variance_81, y = var_3858)[name = string("op_3859")]; fp32 var_3860_epsilon_0 = const()[name = string("op_3860_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3860 = rsqrt(epsilon = var_3860_epsilon_0, x = var_3859)[name = string("op_3860")]; tensor hidden_states_603 = mul(x = hidden_states_599, y = var_3860)[name = string("hidden_states_603")]; tensor hidden_states_607 = mul(x = model_model_layers_20_input_layernorm_weight, y = hidden_states_603)[name = string("hidden_states_607")]; tensor linear_140 = linear(bias = model_model_layers_20_self_attn_q_proj_bias, weight = model_model_layers_20_self_attn_q_proj_weight_quantized, x = hidden_states_607)[name = string("linear_140")]; tensor linear_141 = linear(bias = model_model_layers_20_self_attn_k_proj_bias, weight = model_model_layers_20_self_attn_k_proj_weight_quantized, x = hidden_states_607)[name = string("linear_141")]; tensor linear_142 = linear(bias = model_model_layers_20_self_attn_v_proj_bias, weight = model_model_layers_20_self_attn_v_proj_weight_quantized, x = hidden_states_607)[name = string("linear_142")]; tensor concat_380x = const()[name = string("concat_380x"), val = tensor([1, -1, 16, 128])]; tensor var_3883 = reshape(shape = concat_380x, x = linear_140)[name = string("op_3883")]; tensor q_41_perm_0 = const()[name = string("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_381x = const()[name = string("concat_381x"), val = tensor([1, -1, 2, 128])]; tensor var_3886 = reshape(shape = concat_381x, x = linear_141)[name = string("op_3886")]; tensor k_41_perm_0 = const()[name = string("k_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_382x = const()[name = string("concat_382x"), val = tensor([1, -1, 2, 128])]; tensor var_3889 = reshape(shape = concat_382x, x = linear_142)[name = string("op_3889")]; tensor v_state_41_perm_0 = const()[name = string("v_state_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_41 = transpose(perm = q_41_perm_0, x = var_3883)[name = string("transpose_63")]; tensor var_3893 = mul(x = q_41, y = cos_7)[name = string("op_3893")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41)[name = string("x1_81")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41)[name = string("x2_81")]; fp32 const_43_promoted = const()[name = string("const_43_promoted"), val = fp32(-0x1p+0)]; tensor var_3904 = mul(x = x2_81, y = const_43_promoted)[name = string("op_3904")]; bool var_3906_interleave_0 = const()[name = string("op_3906_interleave_0"), val = bool(false)]; tensor var_3906 = concat(axis = var_88, interleave = var_3906_interleave_0, values = (var_3904, x1_81))[name = string("op_3906")]; tensor var_3907 = mul(x = var_3906, y = sin_7)[name = string("op_3907")]; tensor query_states_83 = add(x = var_3893, y = var_3907)[name = string("query_states_83")]; tensor k_41 = transpose(perm = k_41_perm_0, x = var_3886)[name = string("transpose_62")]; tensor var_3909 = mul(x = k_41, y = cos_7)[name = string("op_3909")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41)[name = string("x1_83")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41)[name = string("x2_83")]; fp32 const_44_promoted = const()[name = string("const_44_promoted"), val = fp32(-0x1p+0)]; tensor var_3920 = mul(x = x2_83, y = const_44_promoted)[name = string("op_3920")]; bool var_3922_interleave_0 = const()[name = string("op_3922_interleave_0"), val = bool(false)]; tensor var_3922 = concat(axis = var_88, interleave = var_3922_interleave_0, values = (var_3920, x1_83))[name = string("op_3922")]; tensor var_3923 = mul(x = var_3922, y = sin_7)[name = string("op_3923")]; tensor k_state_41 = add(x = var_3909, y = var_3923)[name = string("k_state_41")]; tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; tensor concat_385_values0_0 = const()[name = string("concat_385_values0_0"), val = tensor([20])]; int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (concat_385_values0_0, expand_dims_240, expand_dims_241, expand_dims_2, expand_dims_243))[name = string("concat_385")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_21 = slice_update(begin = concat_385, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = k_state_41, x = key_cache_internal_tensor_assign_20_1)[name = string("key_cache_internal_tensor_assign_21")]; string cast_293_dtype_0 = const()[name = string("cast_293_dtype_0"), val = string("fp16")]; tensor cast_293 = cast(dtype = cast_293_dtype_0, x = key_cache_internal_tensor_assign_21)[name = string("cast_64")]; write_state(data = cast_293, input = key_cache)[name = string("coreml_update_state_112_write_state")]; tensor coreml_update_state_112 = read_state(input = key_cache)[name = string("coreml_update_state_112")]; string key_cache_internal_tensor_assign_21_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_21_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_41 = transpose(perm = v_state_41_perm_0, x = var_3889)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_21 = slice_update(begin = concat_385, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = v_state_41, x = value_cache_internal_tensor_assign_20_1)[name = string("value_cache_internal_tensor_assign_21")]; string cast_294_dtype_0 = const()[name = string("cast_294_dtype_0"), val = string("fp16")]; tensor cast_294 = cast(dtype = cast_294_dtype_0, x = value_cache_internal_tensor_assign_21)[name = string("cast_63")]; write_state(data = cast_294, input = value_cache)[name = string("coreml_update_state_113_write_state")]; tensor coreml_update_state_113 = read_state(input = value_cache)[name = string("coreml_update_state_113")]; string value_cache_internal_tensor_assign_21_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_21_dtype_0"), val = string("fp32")]; tensor var_3946_begin_0 = const()[name = string("op_3946_begin_0"), val = tensor([20, 0, 0, 0, 0])]; tensor var_3946_end_0 = const()[name = string("op_3946_end_0"), val = tensor([21, 1, 2, 2048, 128])]; tensor var_3946_end_mask_0 = const()[name = string("op_3946_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3946_squeeze_mask_0 = const()[name = string("op_3946_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_1 = cast(dtype = key_cache_internal_tensor_assign_21_dtype_0, x = coreml_update_state_112)[name = string("cast_62")]; tensor var_3946 = slice_by_index(begin = var_3946_begin_0, end = var_3946_end_0, end_mask = var_3946_end_mask_0, squeeze_mask = var_3946_squeeze_mask_0, x = key_cache_internal_tensor_assign_21_1)[name = string("op_3946")]; tensor var_3949_begin_0 = const()[name = string("op_3949_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3949_end_mask_0 = const()[name = string("op_3949_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3949 = slice_by_index(begin = var_3949_begin_0, end = concat_11, end_mask = var_3949_end_mask_0, x = var_3946)[name = string("op_3949")]; tensor var_3951_begin_0 = const()[name = string("op_3951_begin_0"), val = tensor([20, 0, 0, 0, 0])]; tensor var_3951_end_0 = const()[name = string("op_3951_end_0"), val = tensor([21, 1, 2, 2048, 128])]; tensor var_3951_end_mask_0 = const()[name = string("op_3951_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3951_squeeze_mask_0 = const()[name = string("op_3951_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_21_1 = cast(dtype = value_cache_internal_tensor_assign_21_dtype_0, x = coreml_update_state_113)[name = string("cast_61")]; tensor var_3951 = slice_by_index(begin = var_3951_begin_0, end = var_3951_end_0, end_mask = var_3951_end_mask_0, squeeze_mask = var_3951_squeeze_mask_0, x = value_cache_internal_tensor_assign_21_1)[name = string("op_3951")]; tensor var_3954_begin_0 = const()[name = string("op_3954_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3954_end_mask_0 = const()[name = string("op_3954_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3954 = slice_by_index(begin = var_3954_begin_0, end = concat_11, end_mask = var_3954_end_mask_0, x = var_3951)[name = string("op_3954")]; tensor var_3956_shape = shape(x = var_3949)[name = string("op_3956_shape")]; int32 gather_373 = const()[name = string("gather_373"), val = int32(1)]; int32 gather_374 = const()[name = string("gather_374"), val = int32(2)]; int32 select_375 = const()[name = string("select_375"), val = int32(2)]; int32 gather_375_axis_0 = const()[name = string("gather_375_axis_0"), val = int32(0)]; int32 gather_375_batch_dims_0 = const()[name = string("gather_375_batch_dims_0"), val = int32(0)]; bool gather_375_validate_indices_0 = const()[name = string("gather_375_validate_indices_0"), val = bool(false)]; int32 gather_375 = gather(axis = gather_375_axis_0, batch_dims = gather_375_batch_dims_0, indices = select_375, validate_indices = gather_375_validate_indices_0, x = var_3956_shape)[name = string("gather_375")]; int32 gather_376 = const()[name = string("gather_376"), val = int32(128)]; tensor var_3963_axes_0 = const()[name = string("op_3963_axes_0"), val = tensor([2])]; tensor var_3963 = expand_dims(axes = var_3963_axes_0, x = var_3949)[name = string("op_3963")]; tensor shape_417 = shape(x = var_3963)[name = string("shape_417")]; int32 concat_393_axis_0 = const()[name = string("concat_393_axis_0"), val = int32(0)]; bool concat_393_interleave_0 = const()[name = string("concat_393_interleave_0"), val = bool(false)]; tensor concat_393 = concat(axis = concat_393_axis_0, interleave = concat_393_interleave_0, values = (gather_373, gather_374, var_100, gather_375, gather_376))[name = string("concat_393")]; tensor real_div_40 = real_div(x = concat_393, y = shape_417)[name = string("real_div_40")]; tensor hidden_states_611 = tile(reps = real_div_40, x = var_3963)[name = string("hidden_states_611")]; tensor concat_394x = const()[name = string("concat_394x"), val = tensor([1, 16, -1, 128])]; tensor key_states_83 = reshape(shape = concat_394x, x = hidden_states_611)[name = string("key_states_83")]; tensor var_3973_shape = shape(x = var_3954)[name = string("op_3973_shape")]; int32 gather_377 = const()[name = string("gather_377"), val = int32(1)]; int32 gather_378 = const()[name = string("gather_378"), val = int32(2)]; int32 select_379 = const()[name = string("select_379"), val = int32(2)]; int32 gather_379_axis_0 = const()[name = string("gather_379_axis_0"), val = int32(0)]; int32 gather_379_batch_dims_0 = const()[name = string("gather_379_batch_dims_0"), val = int32(0)]; bool gather_379_validate_indices_0 = const()[name = string("gather_379_validate_indices_0"), val = bool(false)]; int32 gather_379 = gather(axis = gather_379_axis_0, batch_dims = gather_379_batch_dims_0, indices = select_379, validate_indices = gather_379_validate_indices_0, x = var_3973_shape)[name = string("gather_379")]; int32 gather_380 = const()[name = string("gather_380"), val = int32(128)]; tensor var_3980_axes_0 = const()[name = string("op_3980_axes_0"), val = tensor([2])]; tensor var_3980 = expand_dims(axes = var_3980_axes_0, x = var_3954)[name = string("op_3980")]; tensor shape_422 = shape(x = var_3980)[name = string("shape_422")]; int32 concat_395_axis_0 = const()[name = string("concat_395_axis_0"), val = int32(0)]; bool concat_395_interleave_0 = const()[name = string("concat_395_interleave_0"), val = bool(false)]; tensor concat_395 = concat(axis = concat_395_axis_0, interleave = concat_395_interleave_0, values = (gather_377, gather_378, var_100, gather_379, gather_380))[name = string("concat_395")]; tensor real_div_41 = real_div(x = concat_395, y = shape_422)[name = string("real_div_41")]; tensor hidden_states_615 = tile(reps = real_div_41, x = var_3980)[name = string("hidden_states_615")]; tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, 16, -1, 128])]; tensor value_states_83 = reshape(shape = concat_396x, x = hidden_states_615)[name = string("value_states_83")]; tensor var_3990_shape = shape(x = key_states_83)[name = string("op_3990_shape")]; int32 select_381 = const()[name = string("select_381"), val = int32(2)]; int32 gather_381_axis_0 = const()[name = string("gather_381_axis_0"), val = int32(0)]; int32 gather_381_batch_dims_0 = const()[name = string("gather_381_batch_dims_0"), val = int32(0)]; bool gather_381_validate_indices_0 = const()[name = string("gather_381_validate_indices_0"), val = bool(false)]; int32 gather_381 = gather(axis = gather_381_axis_0, batch_dims = gather_381_batch_dims_0, indices = select_381, validate_indices = gather_381_validate_indices_0, x = var_3990_shape)[name = string("gather_381")]; int32 concat_397_values0_0 = const()[name = string("concat_397_values0_0"), val = int32(1)]; int32 concat_397_values1_0 = const()[name = string("concat_397_values1_0"), val = int32(1)]; int32 concat_397_values2_0 = const()[name = string("concat_397_values2_0"), val = int32(0)]; int32 concat_397_axis_0 = const()[name = string("concat_397_axis_0"), val = int32(0)]; bool concat_397_interleave_0 = const()[name = string("concat_397_interleave_0"), val = bool(false)]; tensor concat_397 = concat(axis = concat_397_axis_0, interleave = concat_397_interleave_0, values = (concat_397_values0_0, concat_397_values1_0, concat_397_values2_0, gather_381))[name = string("concat_397")]; tensor causal_mask_43_begin_0 = const()[name = string("causal_mask_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_43_end_mask_0 = const()[name = string("causal_mask_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_43 = slice_by_index(begin = causal_mask_43_begin_0, end = concat_397, end_mask = causal_mask_43_end_mask_0, x = cast_0)[name = string("causal_mask_43")]; tensor attn_output_81 = scaled_dot_product_attention(attn_mask = causal_mask_43, key = key_states_83, query = query_states_83, value = value_states_83)[name = string("attn_output_81")]; tensor var_3996_perm_0 = const()[name = string("op_3996_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_398x = const()[name = string("concat_398x"), val = tensor([1, -1, 2048])]; tensor var_3996 = transpose(perm = var_3996_perm_0, x = attn_output_81)[name = string("transpose_60")]; tensor input_161 = reshape(shape = concat_398x, x = var_3996)[name = string("input_161")]; tensor linear_143 = linear(bias = linear_3_bias_0, weight = model_model_layers_20_self_attn_o_proj_weight_quantized, x = input_161)[name = string("linear_143")]; tensor hidden_states_619 = add(x = hidden_states_599, y = linear_143)[name = string("hidden_states_619")]; fp32 var_94_promoted_41 = const()[name = string("op_94_promoted_41"), val = fp32(0x1p+1)]; tensor var_4005 = pow(x = hidden_states_619, y = var_94_promoted_41)[name = string("op_4005")]; tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; tensor variance_83 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_4005)[name = string("variance_83")]; fp32 var_4008 = const()[name = string("op_4008"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4009 = add(x = variance_83, y = var_4008)[name = string("op_4009")]; fp32 var_4010_epsilon_0 = const()[name = string("op_4010_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4010 = rsqrt(epsilon = var_4010_epsilon_0, x = var_4009)[name = string("op_4010")]; tensor hidden_states_623 = mul(x = hidden_states_619, y = var_4010)[name = string("hidden_states_623")]; tensor input_163 = mul(x = model_model_layers_20_post_attention_layernorm_weight, y = hidden_states_623)[name = string("input_163")]; tensor linear_144 = linear(bias = linear_4_bias_0, weight = model_model_layers_20_mlp_gate_proj_weight_quantized, x = input_163)[name = string("linear_144")]; tensor var_4022 = silu(x = linear_144)[name = string("op_4022")]; tensor linear_145 = linear(bias = linear_4_bias_0, weight = model_model_layers_20_mlp_up_proj_weight_quantized, x = input_163)[name = string("linear_145")]; tensor input_167 = mul(x = var_4022, y = linear_145)[name = string("input_167")]; tensor linear_146 = linear(bias = linear_3_bias_0, weight = model_model_layers_20_mlp_down_proj_weight_quantized, x = input_167)[name = string("linear_146")]; tensor hidden_states_629 = add(x = hidden_states_619, y = linear_146)[name = string("hidden_states_629")]; fp32 var_94_promoted_42 = const()[name = string("op_94_promoted_42"), val = fp32(0x1p+1)]; tensor var_4035 = pow(x = hidden_states_629, y = var_94_promoted_42)[name = string("op_4035")]; tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; tensor variance_85 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_4035)[name = string("variance_85")]; fp32 var_4038 = const()[name = string("op_4038"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4039 = add(x = variance_85, y = var_4038)[name = string("op_4039")]; fp32 var_4040_epsilon_0 = const()[name = string("op_4040_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4040 = rsqrt(epsilon = var_4040_epsilon_0, x = var_4039)[name = string("op_4040")]; tensor hidden_states_633 = mul(x = hidden_states_629, y = var_4040)[name = string("hidden_states_633")]; tensor hidden_states_637 = mul(x = model_model_layers_21_input_layernorm_weight, y = hidden_states_633)[name = string("hidden_states_637")]; tensor linear_147 = linear(bias = model_model_layers_21_self_attn_q_proj_bias, weight = model_model_layers_21_self_attn_q_proj_weight_quantized, x = hidden_states_637)[name = string("linear_147")]; tensor linear_148 = linear(bias = model_model_layers_21_self_attn_k_proj_bias, weight = model_model_layers_21_self_attn_k_proj_weight_quantized, x = hidden_states_637)[name = string("linear_148")]; tensor linear_149 = linear(bias = model_model_layers_21_self_attn_v_proj_bias, weight = model_model_layers_21_self_attn_v_proj_weight_quantized, x = hidden_states_637)[name = string("linear_149")]; tensor concat_399x = const()[name = string("concat_399x"), val = tensor([1, -1, 16, 128])]; tensor var_4063 = reshape(shape = concat_399x, x = linear_147)[name = string("op_4063")]; tensor q_43_perm_0 = const()[name = string("q_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_400x = const()[name = string("concat_400x"), val = tensor([1, -1, 2, 128])]; tensor var_4066 = reshape(shape = concat_400x, x = linear_148)[name = string("op_4066")]; tensor k_43_perm_0 = const()[name = string("k_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_401x = const()[name = string("concat_401x"), val = tensor([1, -1, 2, 128])]; tensor var_4069 = reshape(shape = concat_401x, x = linear_149)[name = string("op_4069")]; tensor v_state_43_perm_0 = const()[name = string("v_state_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_43 = transpose(perm = q_43_perm_0, x = var_4063)[name = string("transpose_59")]; tensor var_4073 = mul(x = q_43, y = cos_7)[name = string("op_4073")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43)[name = string("x1_85")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43)[name = string("x2_85")]; fp32 const_45_promoted = const()[name = string("const_45_promoted"), val = fp32(-0x1p+0)]; tensor var_4084 = mul(x = x2_85, y = const_45_promoted)[name = string("op_4084")]; bool var_4086_interleave_0 = const()[name = string("op_4086_interleave_0"), val = bool(false)]; tensor var_4086 = concat(axis = var_88, interleave = var_4086_interleave_0, values = (var_4084, x1_85))[name = string("op_4086")]; tensor var_4087 = mul(x = var_4086, y = sin_7)[name = string("op_4087")]; tensor query_states_87 = add(x = var_4073, y = var_4087)[name = string("query_states_87")]; tensor k_43 = transpose(perm = k_43_perm_0, x = var_4066)[name = string("transpose_58")]; tensor var_4089 = mul(x = k_43, y = cos_7)[name = string("op_4089")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43)[name = string("x1_87")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43)[name = string("x2_87")]; fp32 const_46_promoted = const()[name = string("const_46_promoted"), val = fp32(-0x1p+0)]; tensor var_4100 = mul(x = x2_87, y = const_46_promoted)[name = string("op_4100")]; bool var_4102_interleave_0 = const()[name = string("op_4102_interleave_0"), val = bool(false)]; tensor var_4102 = concat(axis = var_88, interleave = var_4102_interleave_0, values = (var_4100, x1_87))[name = string("op_4102")]; tensor var_4103 = mul(x = var_4102, y = sin_7)[name = string("op_4103")]; tensor k_state_43 = add(x = var_4089, y = var_4103)[name = string("k_state_43")]; tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([0])]; tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; tensor concat_404_values0_0 = const()[name = string("concat_404_values0_0"), val = tensor([21])]; int32 concat_404_axis_0 = const()[name = string("concat_404_axis_0"), val = int32(0)]; bool concat_404_interleave_0 = const()[name = string("concat_404_interleave_0"), val = bool(false)]; tensor concat_404 = concat(axis = concat_404_axis_0, interleave = concat_404_interleave_0, values = (concat_404_values0_0, expand_dims_252, expand_dims_253, expand_dims_2, expand_dims_255))[name = string("concat_404")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_22 = slice_update(begin = concat_404, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = k_state_43, x = key_cache_internal_tensor_assign_21_1)[name = string("key_cache_internal_tensor_assign_22")]; string cast_307_dtype_0 = const()[name = string("cast_307_dtype_0"), val = string("fp16")]; tensor cast_307 = cast(dtype = cast_307_dtype_0, x = key_cache_internal_tensor_assign_22)[name = string("cast_60")]; write_state(data = cast_307, input = key_cache)[name = string("coreml_update_state_114_write_state")]; tensor coreml_update_state_114 = read_state(input = key_cache)[name = string("coreml_update_state_114")]; string key_cache_internal_tensor_assign_22_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_22_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_43 = transpose(perm = v_state_43_perm_0, x = var_4069)[name = string("transpose_57")]; tensor value_cache_internal_tensor_assign_22 = slice_update(begin = concat_404, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = v_state_43, x = value_cache_internal_tensor_assign_21_1)[name = string("value_cache_internal_tensor_assign_22")]; string cast_308_dtype_0 = const()[name = string("cast_308_dtype_0"), val = string("fp16")]; tensor cast_308 = cast(dtype = cast_308_dtype_0, x = value_cache_internal_tensor_assign_22)[name = string("cast_59")]; write_state(data = cast_308, input = value_cache)[name = string("coreml_update_state_115_write_state")]; tensor coreml_update_state_115 = read_state(input = value_cache)[name = string("coreml_update_state_115")]; string value_cache_internal_tensor_assign_22_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_22_dtype_0"), val = string("fp32")]; tensor var_4126_begin_0 = const()[name = string("op_4126_begin_0"), val = tensor([21, 0, 0, 0, 0])]; tensor var_4126_end_0 = const()[name = string("op_4126_end_0"), val = tensor([22, 1, 2, 2048, 128])]; tensor var_4126_end_mask_0 = const()[name = string("op_4126_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4126_squeeze_mask_0 = const()[name = string("op_4126_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_1 = cast(dtype = key_cache_internal_tensor_assign_22_dtype_0, x = coreml_update_state_114)[name = string("cast_58")]; tensor var_4126 = slice_by_index(begin = var_4126_begin_0, end = var_4126_end_0, end_mask = var_4126_end_mask_0, squeeze_mask = var_4126_squeeze_mask_0, x = key_cache_internal_tensor_assign_22_1)[name = string("op_4126")]; tensor var_4129_begin_0 = const()[name = string("op_4129_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4129_end_mask_0 = const()[name = string("op_4129_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4129 = slice_by_index(begin = var_4129_begin_0, end = concat_11, end_mask = var_4129_end_mask_0, x = var_4126)[name = string("op_4129")]; tensor var_4131_begin_0 = const()[name = string("op_4131_begin_0"), val = tensor([21, 0, 0, 0, 0])]; tensor var_4131_end_0 = const()[name = string("op_4131_end_0"), val = tensor([22, 1, 2, 2048, 128])]; tensor var_4131_end_mask_0 = const()[name = string("op_4131_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4131_squeeze_mask_0 = const()[name = string("op_4131_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_22_1 = cast(dtype = value_cache_internal_tensor_assign_22_dtype_0, x = coreml_update_state_115)[name = string("cast_57")]; tensor var_4131 = slice_by_index(begin = var_4131_begin_0, end = var_4131_end_0, end_mask = var_4131_end_mask_0, squeeze_mask = var_4131_squeeze_mask_0, x = value_cache_internal_tensor_assign_22_1)[name = string("op_4131")]; tensor var_4134_begin_0 = const()[name = string("op_4134_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4134_end_mask_0 = const()[name = string("op_4134_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4134 = slice_by_index(begin = var_4134_begin_0, end = concat_11, end_mask = var_4134_end_mask_0, x = var_4131)[name = string("op_4134")]; tensor var_4136_shape = shape(x = var_4129)[name = string("op_4136_shape")]; int32 gather_391 = const()[name = string("gather_391"), val = int32(1)]; int32 gather_392 = const()[name = string("gather_392"), val = int32(2)]; int32 select_393 = const()[name = string("select_393"), val = int32(2)]; int32 gather_393_axis_0 = const()[name = string("gather_393_axis_0"), val = int32(0)]; int32 gather_393_batch_dims_0 = const()[name = string("gather_393_batch_dims_0"), val = int32(0)]; bool gather_393_validate_indices_0 = const()[name = string("gather_393_validate_indices_0"), val = bool(false)]; int32 gather_393 = gather(axis = gather_393_axis_0, batch_dims = gather_393_batch_dims_0, indices = select_393, validate_indices = gather_393_validate_indices_0, x = var_4136_shape)[name = string("gather_393")]; int32 gather_394 = const()[name = string("gather_394"), val = int32(128)]; tensor var_4143_axes_0 = const()[name = string("op_4143_axes_0"), val = tensor([2])]; tensor var_4143 = expand_dims(axes = var_4143_axes_0, x = var_4129)[name = string("op_4143")]; tensor shape_437 = shape(x = var_4143)[name = string("shape_437")]; int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (gather_391, gather_392, var_100, gather_393, gather_394))[name = string("concat_412")]; tensor real_div_42 = real_div(x = concat_412, y = shape_437)[name = string("real_div_42")]; tensor hidden_states_641 = tile(reps = real_div_42, x = var_4143)[name = string("hidden_states_641")]; tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, 16, -1, 128])]; tensor key_states_87 = reshape(shape = concat_413x, x = hidden_states_641)[name = string("key_states_87")]; tensor var_4153_shape = shape(x = var_4134)[name = string("op_4153_shape")]; int32 gather_395 = const()[name = string("gather_395"), val = int32(1)]; int32 gather_396 = const()[name = string("gather_396"), val = int32(2)]; int32 select_397 = const()[name = string("select_397"), val = int32(2)]; int32 gather_397_axis_0 = const()[name = string("gather_397_axis_0"), val = int32(0)]; int32 gather_397_batch_dims_0 = const()[name = string("gather_397_batch_dims_0"), val = int32(0)]; bool gather_397_validate_indices_0 = const()[name = string("gather_397_validate_indices_0"), val = bool(false)]; int32 gather_397 = gather(axis = gather_397_axis_0, batch_dims = gather_397_batch_dims_0, indices = select_397, validate_indices = gather_397_validate_indices_0, x = var_4153_shape)[name = string("gather_397")]; int32 gather_398 = const()[name = string("gather_398"), val = int32(128)]; tensor var_4160_axes_0 = const()[name = string("op_4160_axes_0"), val = tensor([2])]; tensor var_4160 = expand_dims(axes = var_4160_axes_0, x = var_4134)[name = string("op_4160")]; tensor shape_442 = shape(x = var_4160)[name = string("shape_442")]; int32 concat_414_axis_0 = const()[name = string("concat_414_axis_0"), val = int32(0)]; bool concat_414_interleave_0 = const()[name = string("concat_414_interleave_0"), val = bool(false)]; tensor concat_414 = concat(axis = concat_414_axis_0, interleave = concat_414_interleave_0, values = (gather_395, gather_396, var_100, gather_397, gather_398))[name = string("concat_414")]; tensor real_div_43 = real_div(x = concat_414, y = shape_442)[name = string("real_div_43")]; tensor hidden_states_645 = tile(reps = real_div_43, x = var_4160)[name = string("hidden_states_645")]; tensor concat_415x = const()[name = string("concat_415x"), val = tensor([1, 16, -1, 128])]; tensor value_states_87 = reshape(shape = concat_415x, x = hidden_states_645)[name = string("value_states_87")]; tensor var_4170_shape = shape(x = key_states_87)[name = string("op_4170_shape")]; int32 select_399 = const()[name = string("select_399"), val = int32(2)]; int32 gather_399_axis_0 = const()[name = string("gather_399_axis_0"), val = int32(0)]; int32 gather_399_batch_dims_0 = const()[name = string("gather_399_batch_dims_0"), val = int32(0)]; bool gather_399_validate_indices_0 = const()[name = string("gather_399_validate_indices_0"), val = bool(false)]; int32 gather_399 = gather(axis = gather_399_axis_0, batch_dims = gather_399_batch_dims_0, indices = select_399, validate_indices = gather_399_validate_indices_0, x = var_4170_shape)[name = string("gather_399")]; int32 concat_416_values0_0 = const()[name = string("concat_416_values0_0"), val = int32(1)]; int32 concat_416_values1_0 = const()[name = string("concat_416_values1_0"), val = int32(1)]; int32 concat_416_values2_0 = const()[name = string("concat_416_values2_0"), val = int32(0)]; int32 concat_416_axis_0 = const()[name = string("concat_416_axis_0"), val = int32(0)]; bool concat_416_interleave_0 = const()[name = string("concat_416_interleave_0"), val = bool(false)]; tensor concat_416 = concat(axis = concat_416_axis_0, interleave = concat_416_interleave_0, values = (concat_416_values0_0, concat_416_values1_0, concat_416_values2_0, gather_399))[name = string("concat_416")]; tensor causal_mask_45_begin_0 = const()[name = string("causal_mask_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_45_end_mask_0 = const()[name = string("causal_mask_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_45 = slice_by_index(begin = causal_mask_45_begin_0, end = concat_416, end_mask = causal_mask_45_end_mask_0, x = cast_0)[name = string("causal_mask_45")]; tensor attn_output_85 = scaled_dot_product_attention(attn_mask = causal_mask_45, key = key_states_87, query = query_states_87, value = value_states_87)[name = string("attn_output_85")]; tensor var_4176_perm_0 = const()[name = string("op_4176_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_417x = const()[name = string("concat_417x"), val = tensor([1, -1, 2048])]; tensor var_4176 = transpose(perm = var_4176_perm_0, x = attn_output_85)[name = string("transpose_56")]; tensor input_169 = reshape(shape = concat_417x, x = var_4176)[name = string("input_169")]; tensor linear_150 = linear(bias = linear_3_bias_0, weight = model_model_layers_21_self_attn_o_proj_weight_quantized, x = input_169)[name = string("linear_150")]; tensor hidden_states_649 = add(x = hidden_states_629, y = linear_150)[name = string("hidden_states_649")]; fp32 var_94_promoted_43 = const()[name = string("op_94_promoted_43"), val = fp32(0x1p+1)]; tensor var_4185 = pow(x = hidden_states_649, y = var_94_promoted_43)[name = string("op_4185")]; tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; tensor variance_87 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_4185)[name = string("variance_87")]; fp32 var_4188 = const()[name = string("op_4188"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4189 = add(x = variance_87, y = var_4188)[name = string("op_4189")]; fp32 var_4190_epsilon_0 = const()[name = string("op_4190_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4190 = rsqrt(epsilon = var_4190_epsilon_0, x = var_4189)[name = string("op_4190")]; tensor hidden_states_653 = mul(x = hidden_states_649, y = var_4190)[name = string("hidden_states_653")]; tensor input_171 = mul(x = model_model_layers_21_post_attention_layernorm_weight, y = hidden_states_653)[name = string("input_171")]; tensor linear_151 = linear(bias = linear_4_bias_0, weight = model_model_layers_21_mlp_gate_proj_weight_quantized, x = input_171)[name = string("linear_151")]; tensor var_4202 = silu(x = linear_151)[name = string("op_4202")]; tensor linear_152 = linear(bias = linear_4_bias_0, weight = model_model_layers_21_mlp_up_proj_weight_quantized, x = input_171)[name = string("linear_152")]; tensor input_175 = mul(x = var_4202, y = linear_152)[name = string("input_175")]; tensor linear_153 = linear(bias = linear_3_bias_0, weight = model_model_layers_21_mlp_down_proj_weight_quantized, x = input_175)[name = string("linear_153")]; tensor hidden_states_659 = add(x = hidden_states_649, y = linear_153)[name = string("hidden_states_659")]; fp32 var_94_promoted_44 = const()[name = string("op_94_promoted_44"), val = fp32(0x1p+1)]; tensor var_4215 = pow(x = hidden_states_659, y = var_94_promoted_44)[name = string("op_4215")]; tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; tensor variance_89 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_4215)[name = string("variance_89")]; fp32 var_4218 = const()[name = string("op_4218"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4219 = add(x = variance_89, y = var_4218)[name = string("op_4219")]; fp32 var_4220_epsilon_0 = const()[name = string("op_4220_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4220 = rsqrt(epsilon = var_4220_epsilon_0, x = var_4219)[name = string("op_4220")]; tensor hidden_states_663 = mul(x = hidden_states_659, y = var_4220)[name = string("hidden_states_663")]; tensor hidden_states_667 = mul(x = model_model_layers_22_input_layernorm_weight, y = hidden_states_663)[name = string("hidden_states_667")]; tensor linear_154 = linear(bias = model_model_layers_22_self_attn_q_proj_bias, weight = model_model_layers_22_self_attn_q_proj_weight_quantized, x = hidden_states_667)[name = string("linear_154")]; tensor linear_155 = linear(bias = model_model_layers_22_self_attn_k_proj_bias, weight = model_model_layers_22_self_attn_k_proj_weight_quantized, x = hidden_states_667)[name = string("linear_155")]; tensor linear_156 = linear(bias = model_model_layers_22_self_attn_v_proj_bias, weight = model_model_layers_22_self_attn_v_proj_weight_quantized, x = hidden_states_667)[name = string("linear_156")]; tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 16, 128])]; tensor var_4243 = reshape(shape = concat_418x, x = linear_154)[name = string("op_4243")]; tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 2, 128])]; tensor var_4246 = reshape(shape = concat_419x, x = linear_155)[name = string("op_4246")]; tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_420x = const()[name = string("concat_420x"), val = tensor([1, -1, 2, 128])]; tensor var_4249 = reshape(shape = concat_420x, x = linear_156)[name = string("op_4249")]; tensor v_state_45_perm_0 = const()[name = string("v_state_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_45 = transpose(perm = q_45_perm_0, x = var_4243)[name = string("transpose_55")]; tensor var_4253 = mul(x = q_45, y = cos_7)[name = string("op_4253")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45)[name = string("x1_89")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45)[name = string("x2_89")]; fp32 const_47_promoted = const()[name = string("const_47_promoted"), val = fp32(-0x1p+0)]; tensor var_4264 = mul(x = x2_89, y = const_47_promoted)[name = string("op_4264")]; bool var_4266_interleave_0 = const()[name = string("op_4266_interleave_0"), val = bool(false)]; tensor var_4266 = concat(axis = var_88, interleave = var_4266_interleave_0, values = (var_4264, x1_89))[name = string("op_4266")]; tensor var_4267 = mul(x = var_4266, y = sin_7)[name = string("op_4267")]; tensor query_states_91 = add(x = var_4253, y = var_4267)[name = string("query_states_91")]; tensor k_45 = transpose(perm = k_45_perm_0, x = var_4246)[name = string("transpose_54")]; tensor var_4269 = mul(x = k_45, y = cos_7)[name = string("op_4269")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45)[name = string("x1_91")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45)[name = string("x2_91")]; fp32 const_48_promoted = const()[name = string("const_48_promoted"), val = fp32(-0x1p+0)]; tensor var_4280 = mul(x = x2_91, y = const_48_promoted)[name = string("op_4280")]; bool var_4282_interleave_0 = const()[name = string("op_4282_interleave_0"), val = bool(false)]; tensor var_4282 = concat(axis = var_88, interleave = var_4282_interleave_0, values = (var_4280, x1_91))[name = string("op_4282")]; tensor var_4283 = mul(x = var_4282, y = sin_7)[name = string("op_4283")]; tensor k_state_45 = add(x = var_4269, y = var_4283)[name = string("k_state_45")]; tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([0])]; tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([22])]; int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, expand_dims_264, expand_dims_265, expand_dims_2, expand_dims_267))[name = string("concat_423")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_23 = slice_update(begin = concat_423, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = k_state_45, x = key_cache_internal_tensor_assign_22_1)[name = string("key_cache_internal_tensor_assign_23")]; string cast_321_dtype_0 = const()[name = string("cast_321_dtype_0"), val = string("fp16")]; tensor cast_321 = cast(dtype = cast_321_dtype_0, x = key_cache_internal_tensor_assign_23)[name = string("cast_56")]; write_state(data = cast_321, input = key_cache)[name = string("coreml_update_state_116_write_state")]; tensor coreml_update_state_116 = read_state(input = key_cache)[name = string("coreml_update_state_116")]; string key_cache_internal_tensor_assign_23_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_23_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_45 = transpose(perm = v_state_45_perm_0, x = var_4249)[name = string("transpose_53")]; tensor value_cache_internal_tensor_assign_23 = slice_update(begin = concat_423, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = v_state_45, x = value_cache_internal_tensor_assign_22_1)[name = string("value_cache_internal_tensor_assign_23")]; string cast_322_dtype_0 = const()[name = string("cast_322_dtype_0"), val = string("fp16")]; tensor cast_322 = cast(dtype = cast_322_dtype_0, x = value_cache_internal_tensor_assign_23)[name = string("cast_55")]; write_state(data = cast_322, input = value_cache)[name = string("coreml_update_state_117_write_state")]; tensor coreml_update_state_117 = read_state(input = value_cache)[name = string("coreml_update_state_117")]; string value_cache_internal_tensor_assign_23_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_23_dtype_0"), val = string("fp32")]; tensor var_4306_begin_0 = const()[name = string("op_4306_begin_0"), val = tensor([22, 0, 0, 0, 0])]; tensor var_4306_end_0 = const()[name = string("op_4306_end_0"), val = tensor([23, 1, 2, 2048, 128])]; tensor var_4306_end_mask_0 = const()[name = string("op_4306_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4306_squeeze_mask_0 = const()[name = string("op_4306_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_1 = cast(dtype = key_cache_internal_tensor_assign_23_dtype_0, x = coreml_update_state_116)[name = string("cast_54")]; tensor var_4306 = slice_by_index(begin = var_4306_begin_0, end = var_4306_end_0, end_mask = var_4306_end_mask_0, squeeze_mask = var_4306_squeeze_mask_0, x = key_cache_internal_tensor_assign_23_1)[name = string("op_4306")]; tensor var_4309_begin_0 = const()[name = string("op_4309_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4309_end_mask_0 = const()[name = string("op_4309_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4309 = slice_by_index(begin = var_4309_begin_0, end = concat_11, end_mask = var_4309_end_mask_0, x = var_4306)[name = string("op_4309")]; tensor var_4311_begin_0 = const()[name = string("op_4311_begin_0"), val = tensor([22, 0, 0, 0, 0])]; tensor var_4311_end_0 = const()[name = string("op_4311_end_0"), val = tensor([23, 1, 2, 2048, 128])]; tensor var_4311_end_mask_0 = const()[name = string("op_4311_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4311_squeeze_mask_0 = const()[name = string("op_4311_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_23_1 = cast(dtype = value_cache_internal_tensor_assign_23_dtype_0, x = coreml_update_state_117)[name = string("cast_53")]; tensor var_4311 = slice_by_index(begin = var_4311_begin_0, end = var_4311_end_0, end_mask = var_4311_end_mask_0, squeeze_mask = var_4311_squeeze_mask_0, x = value_cache_internal_tensor_assign_23_1)[name = string("op_4311")]; tensor var_4314_begin_0 = const()[name = string("op_4314_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4314_end_mask_0 = const()[name = string("op_4314_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4314 = slice_by_index(begin = var_4314_begin_0, end = concat_11, end_mask = var_4314_end_mask_0, x = var_4311)[name = string("op_4314")]; tensor var_4316_shape = shape(x = var_4309)[name = string("op_4316_shape")]; int32 gather_409 = const()[name = string("gather_409"), val = int32(1)]; int32 gather_410 = const()[name = string("gather_410"), val = int32(2)]; int32 select_411 = const()[name = string("select_411"), val = int32(2)]; int32 gather_411_axis_0 = const()[name = string("gather_411_axis_0"), val = int32(0)]; int32 gather_411_batch_dims_0 = const()[name = string("gather_411_batch_dims_0"), val = int32(0)]; bool gather_411_validate_indices_0 = const()[name = string("gather_411_validate_indices_0"), val = bool(false)]; int32 gather_411 = gather(axis = gather_411_axis_0, batch_dims = gather_411_batch_dims_0, indices = select_411, validate_indices = gather_411_validate_indices_0, x = var_4316_shape)[name = string("gather_411")]; int32 gather_412 = const()[name = string("gather_412"), val = int32(128)]; tensor var_4323_axes_0 = const()[name = string("op_4323_axes_0"), val = tensor([2])]; tensor var_4323 = expand_dims(axes = var_4323_axes_0, x = var_4309)[name = string("op_4323")]; tensor shape_457 = shape(x = var_4323)[name = string("shape_457")]; int32 concat_431_axis_0 = const()[name = string("concat_431_axis_0"), val = int32(0)]; bool concat_431_interleave_0 = const()[name = string("concat_431_interleave_0"), val = bool(false)]; tensor concat_431 = concat(axis = concat_431_axis_0, interleave = concat_431_interleave_0, values = (gather_409, gather_410, var_100, gather_411, gather_412))[name = string("concat_431")]; tensor real_div_44 = real_div(x = concat_431, y = shape_457)[name = string("real_div_44")]; tensor hidden_states_671 = tile(reps = real_div_44, x = var_4323)[name = string("hidden_states_671")]; tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, 16, -1, 128])]; tensor key_states_91 = reshape(shape = concat_432x, x = hidden_states_671)[name = string("key_states_91")]; tensor var_4333_shape = shape(x = var_4314)[name = string("op_4333_shape")]; int32 gather_413 = const()[name = string("gather_413"), val = int32(1)]; int32 gather_414 = const()[name = string("gather_414"), val = int32(2)]; int32 select_415 = const()[name = string("select_415"), val = int32(2)]; int32 gather_415_axis_0 = const()[name = string("gather_415_axis_0"), val = int32(0)]; int32 gather_415_batch_dims_0 = const()[name = string("gather_415_batch_dims_0"), val = int32(0)]; bool gather_415_validate_indices_0 = const()[name = string("gather_415_validate_indices_0"), val = bool(false)]; int32 gather_415 = gather(axis = gather_415_axis_0, batch_dims = gather_415_batch_dims_0, indices = select_415, validate_indices = gather_415_validate_indices_0, x = var_4333_shape)[name = string("gather_415")]; int32 gather_416 = const()[name = string("gather_416"), val = int32(128)]; tensor var_4340_axes_0 = const()[name = string("op_4340_axes_0"), val = tensor([2])]; tensor var_4340 = expand_dims(axes = var_4340_axes_0, x = var_4314)[name = string("op_4340")]; tensor shape_462 = shape(x = var_4340)[name = string("shape_462")]; int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_413, gather_414, var_100, gather_415, gather_416))[name = string("concat_433")]; tensor real_div_45 = real_div(x = concat_433, y = shape_462)[name = string("real_div_45")]; tensor hidden_states_675 = tile(reps = real_div_45, x = var_4340)[name = string("hidden_states_675")]; tensor concat_434x = const()[name = string("concat_434x"), val = tensor([1, 16, -1, 128])]; tensor value_states_91 = reshape(shape = concat_434x, x = hidden_states_675)[name = string("value_states_91")]; tensor var_4350_shape = shape(x = key_states_91)[name = string("op_4350_shape")]; int32 select_417 = const()[name = string("select_417"), val = int32(2)]; int32 gather_417_axis_0 = const()[name = string("gather_417_axis_0"), val = int32(0)]; int32 gather_417_batch_dims_0 = const()[name = string("gather_417_batch_dims_0"), val = int32(0)]; bool gather_417_validate_indices_0 = const()[name = string("gather_417_validate_indices_0"), val = bool(false)]; int32 gather_417 = gather(axis = gather_417_axis_0, batch_dims = gather_417_batch_dims_0, indices = select_417, validate_indices = gather_417_validate_indices_0, x = var_4350_shape)[name = string("gather_417")]; int32 concat_435_values0_0 = const()[name = string("concat_435_values0_0"), val = int32(1)]; int32 concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = int32(1)]; int32 concat_435_values2_0 = const()[name = string("concat_435_values2_0"), val = int32(0)]; int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (concat_435_values0_0, concat_435_values1_0, concat_435_values2_0, gather_417))[name = string("concat_435")]; tensor causal_mask_47_begin_0 = const()[name = string("causal_mask_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_47_end_mask_0 = const()[name = string("causal_mask_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_47 = slice_by_index(begin = causal_mask_47_begin_0, end = concat_435, end_mask = causal_mask_47_end_mask_0, x = cast_0)[name = string("causal_mask_47")]; tensor attn_output_89 = scaled_dot_product_attention(attn_mask = causal_mask_47, key = key_states_91, query = query_states_91, value = value_states_91)[name = string("attn_output_89")]; tensor var_4356_perm_0 = const()[name = string("op_4356_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_436x = const()[name = string("concat_436x"), val = tensor([1, -1, 2048])]; tensor var_4356 = transpose(perm = var_4356_perm_0, x = attn_output_89)[name = string("transpose_52")]; tensor input_177 = reshape(shape = concat_436x, x = var_4356)[name = string("input_177")]; tensor linear_157 = linear(bias = linear_3_bias_0, weight = model_model_layers_22_self_attn_o_proj_weight_quantized, x = input_177)[name = string("linear_157")]; tensor hidden_states_679 = add(x = hidden_states_659, y = linear_157)[name = string("hidden_states_679")]; fp32 var_94_promoted_45 = const()[name = string("op_94_promoted_45"), val = fp32(0x1p+1)]; tensor var_4365 = pow(x = hidden_states_679, y = var_94_promoted_45)[name = string("op_4365")]; tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; tensor variance_91 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_4365)[name = string("variance_91")]; fp32 var_4368 = const()[name = string("op_4368"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4369 = add(x = variance_91, y = var_4368)[name = string("op_4369")]; fp32 var_4370_epsilon_0 = const()[name = string("op_4370_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4370 = rsqrt(epsilon = var_4370_epsilon_0, x = var_4369)[name = string("op_4370")]; tensor hidden_states_683 = mul(x = hidden_states_679, y = var_4370)[name = string("hidden_states_683")]; tensor input_179 = mul(x = model_model_layers_22_post_attention_layernorm_weight, y = hidden_states_683)[name = string("input_179")]; tensor linear_158 = linear(bias = linear_4_bias_0, weight = model_model_layers_22_mlp_gate_proj_weight_quantized, x = input_179)[name = string("linear_158")]; tensor var_4382 = silu(x = linear_158)[name = string("op_4382")]; tensor linear_159 = linear(bias = linear_4_bias_0, weight = model_model_layers_22_mlp_up_proj_weight_quantized, x = input_179)[name = string("linear_159")]; tensor input_183 = mul(x = var_4382, y = linear_159)[name = string("input_183")]; tensor linear_160 = linear(bias = linear_3_bias_0, weight = model_model_layers_22_mlp_down_proj_weight_quantized, x = input_183)[name = string("linear_160")]; tensor hidden_states_689 = add(x = hidden_states_679, y = linear_160)[name = string("hidden_states_689")]; fp32 var_94_promoted_46 = const()[name = string("op_94_promoted_46"), val = fp32(0x1p+1)]; tensor var_4395 = pow(x = hidden_states_689, y = var_94_promoted_46)[name = string("op_4395")]; tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; tensor variance_93 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_4395)[name = string("variance_93")]; fp32 var_4398 = const()[name = string("op_4398"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4399 = add(x = variance_93, y = var_4398)[name = string("op_4399")]; fp32 var_4400_epsilon_0 = const()[name = string("op_4400_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4400 = rsqrt(epsilon = var_4400_epsilon_0, x = var_4399)[name = string("op_4400")]; tensor hidden_states_693 = mul(x = hidden_states_689, y = var_4400)[name = string("hidden_states_693")]; tensor hidden_states_697 = mul(x = model_model_layers_23_input_layernorm_weight, y = hidden_states_693)[name = string("hidden_states_697")]; tensor linear_161 = linear(bias = model_model_layers_23_self_attn_q_proj_bias, weight = model_model_layers_23_self_attn_q_proj_weight_quantized, x = hidden_states_697)[name = string("linear_161")]; tensor linear_162 = linear(bias = model_model_layers_23_self_attn_k_proj_bias, weight = model_model_layers_23_self_attn_k_proj_weight_quantized, x = hidden_states_697)[name = string("linear_162")]; tensor linear_163 = linear(bias = model_model_layers_23_self_attn_v_proj_bias, weight = model_model_layers_23_self_attn_v_proj_weight_quantized, x = hidden_states_697)[name = string("linear_163")]; tensor concat_437x = const()[name = string("concat_437x"), val = tensor([1, -1, 16, 128])]; tensor var_4423 = reshape(shape = concat_437x, x = linear_161)[name = string("op_4423")]; tensor q_47_perm_0 = const()[name = string("q_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_438x = const()[name = string("concat_438x"), val = tensor([1, -1, 2, 128])]; tensor var_4426 = reshape(shape = concat_438x, x = linear_162)[name = string("op_4426")]; tensor k_47_perm_0 = const()[name = string("k_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_439x = const()[name = string("concat_439x"), val = tensor([1, -1, 2, 128])]; tensor var_4429 = reshape(shape = concat_439x, x = linear_163)[name = string("op_4429")]; tensor v_state_47_perm_0 = const()[name = string("v_state_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_47 = transpose(perm = q_47_perm_0, x = var_4423)[name = string("transpose_51")]; tensor var_4433 = mul(x = q_47, y = cos_7)[name = string("op_4433")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47)[name = string("x1_93")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47)[name = string("x2_93")]; fp32 const_49_promoted = const()[name = string("const_49_promoted"), val = fp32(-0x1p+0)]; tensor var_4444 = mul(x = x2_93, y = const_49_promoted)[name = string("op_4444")]; bool var_4446_interleave_0 = const()[name = string("op_4446_interleave_0"), val = bool(false)]; tensor var_4446 = concat(axis = var_88, interleave = var_4446_interleave_0, values = (var_4444, x1_93))[name = string("op_4446")]; tensor var_4447 = mul(x = var_4446, y = sin_7)[name = string("op_4447")]; tensor query_states_95 = add(x = var_4433, y = var_4447)[name = string("query_states_95")]; tensor k_47 = transpose(perm = k_47_perm_0, x = var_4426)[name = string("transpose_50")]; tensor var_4449 = mul(x = k_47, y = cos_7)[name = string("op_4449")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47)[name = string("x1_95")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47)[name = string("x2_95")]; fp32 const_50_promoted = const()[name = string("const_50_promoted"), val = fp32(-0x1p+0)]; tensor var_4460 = mul(x = x2_95, y = const_50_promoted)[name = string("op_4460")]; bool var_4462_interleave_0 = const()[name = string("op_4462_interleave_0"), val = bool(false)]; tensor var_4462 = concat(axis = var_88, interleave = var_4462_interleave_0, values = (var_4460, x1_95))[name = string("op_4462")]; tensor var_4463 = mul(x = var_4462, y = sin_7)[name = string("op_4463")]; tensor k_state_47 = add(x = var_4449, y = var_4463)[name = string("k_state_47")]; tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([0])]; tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; tensor concat_442_values0_0 = const()[name = string("concat_442_values0_0"), val = tensor([23])]; int32 concat_442_axis_0 = const()[name = string("concat_442_axis_0"), val = int32(0)]; bool concat_442_interleave_0 = const()[name = string("concat_442_interleave_0"), val = bool(false)]; tensor concat_442 = concat(axis = concat_442_axis_0, interleave = concat_442_interleave_0, values = (concat_442_values0_0, expand_dims_276, expand_dims_277, expand_dims_2, expand_dims_279))[name = string("concat_442")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_24 = slice_update(begin = concat_442, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = k_state_47, x = key_cache_internal_tensor_assign_23_1)[name = string("key_cache_internal_tensor_assign_24")]; string cast_335_dtype_0 = const()[name = string("cast_335_dtype_0"), val = string("fp16")]; tensor cast_335 = cast(dtype = cast_335_dtype_0, x = key_cache_internal_tensor_assign_24)[name = string("cast_52")]; write_state(data = cast_335, input = key_cache)[name = string("coreml_update_state_118_write_state")]; tensor coreml_update_state_118 = read_state(input = key_cache)[name = string("coreml_update_state_118")]; string key_cache_internal_tensor_assign_24_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_24_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_47 = transpose(perm = v_state_47_perm_0, x = var_4429)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_24 = slice_update(begin = concat_442, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = v_state_47, x = value_cache_internal_tensor_assign_23_1)[name = string("value_cache_internal_tensor_assign_24")]; string cast_336_dtype_0 = const()[name = string("cast_336_dtype_0"), val = string("fp16")]; tensor cast_336 = cast(dtype = cast_336_dtype_0, x = value_cache_internal_tensor_assign_24)[name = string("cast_51")]; write_state(data = cast_336, input = value_cache)[name = string("coreml_update_state_119_write_state")]; tensor coreml_update_state_119 = read_state(input = value_cache)[name = string("coreml_update_state_119")]; string value_cache_internal_tensor_assign_24_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_24_dtype_0"), val = string("fp32")]; tensor var_4486_begin_0 = const()[name = string("op_4486_begin_0"), val = tensor([23, 0, 0, 0, 0])]; tensor var_4486_end_0 = const()[name = string("op_4486_end_0"), val = tensor([24, 1, 2, 2048, 128])]; tensor var_4486_end_mask_0 = const()[name = string("op_4486_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4486_squeeze_mask_0 = const()[name = string("op_4486_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_1 = cast(dtype = key_cache_internal_tensor_assign_24_dtype_0, x = coreml_update_state_118)[name = string("cast_50")]; tensor var_4486 = slice_by_index(begin = var_4486_begin_0, end = var_4486_end_0, end_mask = var_4486_end_mask_0, squeeze_mask = var_4486_squeeze_mask_0, x = key_cache_internal_tensor_assign_24_1)[name = string("op_4486")]; tensor var_4489_begin_0 = const()[name = string("op_4489_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4489_end_mask_0 = const()[name = string("op_4489_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4489 = slice_by_index(begin = var_4489_begin_0, end = concat_11, end_mask = var_4489_end_mask_0, x = var_4486)[name = string("op_4489")]; tensor var_4491_begin_0 = const()[name = string("op_4491_begin_0"), val = tensor([23, 0, 0, 0, 0])]; tensor var_4491_end_0 = const()[name = string("op_4491_end_0"), val = tensor([24, 1, 2, 2048, 128])]; tensor var_4491_end_mask_0 = const()[name = string("op_4491_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4491_squeeze_mask_0 = const()[name = string("op_4491_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_24_1 = cast(dtype = value_cache_internal_tensor_assign_24_dtype_0, x = coreml_update_state_119)[name = string("cast_49")]; tensor var_4491 = slice_by_index(begin = var_4491_begin_0, end = var_4491_end_0, end_mask = var_4491_end_mask_0, squeeze_mask = var_4491_squeeze_mask_0, x = value_cache_internal_tensor_assign_24_1)[name = string("op_4491")]; tensor var_4494_begin_0 = const()[name = string("op_4494_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4494_end_mask_0 = const()[name = string("op_4494_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4494 = slice_by_index(begin = var_4494_begin_0, end = concat_11, end_mask = var_4494_end_mask_0, x = var_4491)[name = string("op_4494")]; tensor var_4496_shape = shape(x = var_4489)[name = string("op_4496_shape")]; int32 gather_427 = const()[name = string("gather_427"), val = int32(1)]; int32 gather_428 = const()[name = string("gather_428"), val = int32(2)]; int32 select_429 = const()[name = string("select_429"), val = int32(2)]; int32 gather_429_axis_0 = const()[name = string("gather_429_axis_0"), val = int32(0)]; int32 gather_429_batch_dims_0 = const()[name = string("gather_429_batch_dims_0"), val = int32(0)]; bool gather_429_validate_indices_0 = const()[name = string("gather_429_validate_indices_0"), val = bool(false)]; int32 gather_429 = gather(axis = gather_429_axis_0, batch_dims = gather_429_batch_dims_0, indices = select_429, validate_indices = gather_429_validate_indices_0, x = var_4496_shape)[name = string("gather_429")]; int32 gather_430 = const()[name = string("gather_430"), val = int32(128)]; tensor var_4503_axes_0 = const()[name = string("op_4503_axes_0"), val = tensor([2])]; tensor var_4503 = expand_dims(axes = var_4503_axes_0, x = var_4489)[name = string("op_4503")]; tensor shape_477 = shape(x = var_4503)[name = string("shape_477")]; int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (gather_427, gather_428, var_100, gather_429, gather_430))[name = string("concat_450")]; tensor real_div_46 = real_div(x = concat_450, y = shape_477)[name = string("real_div_46")]; tensor hidden_states_701 = tile(reps = real_div_46, x = var_4503)[name = string("hidden_states_701")]; tensor concat_451x = const()[name = string("concat_451x"), val = tensor([1, 16, -1, 128])]; tensor key_states_95 = reshape(shape = concat_451x, x = hidden_states_701)[name = string("key_states_95")]; tensor var_4513_shape = shape(x = var_4494)[name = string("op_4513_shape")]; int32 gather_431 = const()[name = string("gather_431"), val = int32(1)]; int32 gather_432 = const()[name = string("gather_432"), val = int32(2)]; int32 select_433 = const()[name = string("select_433"), val = int32(2)]; int32 gather_433_axis_0 = const()[name = string("gather_433_axis_0"), val = int32(0)]; int32 gather_433_batch_dims_0 = const()[name = string("gather_433_batch_dims_0"), val = int32(0)]; bool gather_433_validate_indices_0 = const()[name = string("gather_433_validate_indices_0"), val = bool(false)]; int32 gather_433 = gather(axis = gather_433_axis_0, batch_dims = gather_433_batch_dims_0, indices = select_433, validate_indices = gather_433_validate_indices_0, x = var_4513_shape)[name = string("gather_433")]; int32 gather_434 = const()[name = string("gather_434"), val = int32(128)]; tensor var_4520_axes_0 = const()[name = string("op_4520_axes_0"), val = tensor([2])]; tensor var_4520 = expand_dims(axes = var_4520_axes_0, x = var_4494)[name = string("op_4520")]; tensor shape_482 = shape(x = var_4520)[name = string("shape_482")]; int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (gather_431, gather_432, var_100, gather_433, gather_434))[name = string("concat_452")]; tensor real_div_47 = real_div(x = concat_452, y = shape_482)[name = string("real_div_47")]; tensor hidden_states_705 = tile(reps = real_div_47, x = var_4520)[name = string("hidden_states_705")]; tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, 16, -1, 128])]; tensor value_states_95 = reshape(shape = concat_453x, x = hidden_states_705)[name = string("value_states_95")]; tensor var_4530_shape = shape(x = key_states_95)[name = string("op_4530_shape")]; int32 select_435 = const()[name = string("select_435"), val = int32(2)]; int32 gather_435_axis_0 = const()[name = string("gather_435_axis_0"), val = int32(0)]; int32 gather_435_batch_dims_0 = const()[name = string("gather_435_batch_dims_0"), val = int32(0)]; bool gather_435_validate_indices_0 = const()[name = string("gather_435_validate_indices_0"), val = bool(false)]; int32 gather_435 = gather(axis = gather_435_axis_0, batch_dims = gather_435_batch_dims_0, indices = select_435, validate_indices = gather_435_validate_indices_0, x = var_4530_shape)[name = string("gather_435")]; int32 concat_454_values0_0 = const()[name = string("concat_454_values0_0"), val = int32(1)]; int32 concat_454_values1_0 = const()[name = string("concat_454_values1_0"), val = int32(1)]; int32 concat_454_values2_0 = const()[name = string("concat_454_values2_0"), val = int32(0)]; int32 concat_454_axis_0 = const()[name = string("concat_454_axis_0"), val = int32(0)]; bool concat_454_interleave_0 = const()[name = string("concat_454_interleave_0"), val = bool(false)]; tensor concat_454 = concat(axis = concat_454_axis_0, interleave = concat_454_interleave_0, values = (concat_454_values0_0, concat_454_values1_0, concat_454_values2_0, gather_435))[name = string("concat_454")]; tensor causal_mask_49_begin_0 = const()[name = string("causal_mask_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_49_end_mask_0 = const()[name = string("causal_mask_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_49 = slice_by_index(begin = causal_mask_49_begin_0, end = concat_454, end_mask = causal_mask_49_end_mask_0, x = cast_0)[name = string("causal_mask_49")]; tensor attn_output_93 = scaled_dot_product_attention(attn_mask = causal_mask_49, key = key_states_95, query = query_states_95, value = value_states_95)[name = string("attn_output_93")]; tensor var_4536_perm_0 = const()[name = string("op_4536_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_455x = const()[name = string("concat_455x"), val = tensor([1, -1, 2048])]; tensor var_4536 = transpose(perm = var_4536_perm_0, x = attn_output_93)[name = string("transpose_48")]; tensor input_185 = reshape(shape = concat_455x, x = var_4536)[name = string("input_185")]; tensor linear_164 = linear(bias = linear_3_bias_0, weight = model_model_layers_23_self_attn_o_proj_weight_quantized, x = input_185)[name = string("linear_164")]; tensor hidden_states_709 = add(x = hidden_states_689, y = linear_164)[name = string("hidden_states_709")]; fp32 var_94_promoted_47 = const()[name = string("op_94_promoted_47"), val = fp32(0x1p+1)]; tensor var_4545 = pow(x = hidden_states_709, y = var_94_promoted_47)[name = string("op_4545")]; tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; tensor variance_95 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_4545)[name = string("variance_95")]; fp32 var_4548 = const()[name = string("op_4548"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4549 = add(x = variance_95, y = var_4548)[name = string("op_4549")]; fp32 var_4550_epsilon_0 = const()[name = string("op_4550_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4550 = rsqrt(epsilon = var_4550_epsilon_0, x = var_4549)[name = string("op_4550")]; tensor hidden_states_713 = mul(x = hidden_states_709, y = var_4550)[name = string("hidden_states_713")]; tensor input_187 = mul(x = model_model_layers_23_post_attention_layernorm_weight, y = hidden_states_713)[name = string("input_187")]; tensor linear_165 = linear(bias = linear_4_bias_0, weight = model_model_layers_23_mlp_gate_proj_weight_quantized, x = input_187)[name = string("linear_165")]; tensor var_4562 = silu(x = linear_165)[name = string("op_4562")]; tensor linear_166 = linear(bias = linear_4_bias_0, weight = model_model_layers_23_mlp_up_proj_weight_quantized, x = input_187)[name = string("linear_166")]; tensor input_191 = mul(x = var_4562, y = linear_166)[name = string("input_191")]; tensor linear_167 = linear(bias = linear_3_bias_0, weight = model_model_layers_23_mlp_down_proj_weight_quantized, x = input_191)[name = string("linear_167")]; tensor hidden_states_719 = add(x = hidden_states_709, y = linear_167)[name = string("hidden_states_719")]; fp32 var_94_promoted_48 = const()[name = string("op_94_promoted_48"), val = fp32(0x1p+1)]; tensor var_4575 = pow(x = hidden_states_719, y = var_94_promoted_48)[name = string("op_4575")]; tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([-1])]; bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; tensor variance_97 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = var_4575)[name = string("variance_97")]; fp32 var_4578 = const()[name = string("op_4578"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4579 = add(x = variance_97, y = var_4578)[name = string("op_4579")]; fp32 var_4580_epsilon_0 = const()[name = string("op_4580_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4580 = rsqrt(epsilon = var_4580_epsilon_0, x = var_4579)[name = string("op_4580")]; tensor hidden_states_723 = mul(x = hidden_states_719, y = var_4580)[name = string("hidden_states_723")]; tensor hidden_states_727 = mul(x = model_model_layers_24_input_layernorm_weight, y = hidden_states_723)[name = string("hidden_states_727")]; tensor linear_168 = linear(bias = model_model_layers_24_self_attn_q_proj_bias, weight = model_model_layers_24_self_attn_q_proj_weight_quantized, x = hidden_states_727)[name = string("linear_168")]; tensor linear_169 = linear(bias = model_model_layers_24_self_attn_k_proj_bias, weight = model_model_layers_24_self_attn_k_proj_weight_quantized, x = hidden_states_727)[name = string("linear_169")]; tensor linear_170 = linear(bias = model_model_layers_24_self_attn_v_proj_bias, weight = model_model_layers_24_self_attn_v_proj_weight_quantized, x = hidden_states_727)[name = string("linear_170")]; tensor concat_456x = const()[name = string("concat_456x"), val = tensor([1, -1, 16, 128])]; tensor var_4603 = reshape(shape = concat_456x, x = linear_168)[name = string("op_4603")]; tensor q_49_perm_0 = const()[name = string("q_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 2, 128])]; tensor var_4606 = reshape(shape = concat_457x, x = linear_169)[name = string("op_4606")]; tensor k_49_perm_0 = const()[name = string("k_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_458x = const()[name = string("concat_458x"), val = tensor([1, -1, 2, 128])]; tensor var_4609 = reshape(shape = concat_458x, x = linear_170)[name = string("op_4609")]; tensor v_state_49_perm_0 = const()[name = string("v_state_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_49 = transpose(perm = q_49_perm_0, x = var_4603)[name = string("transpose_47")]; tensor var_4613 = mul(x = q_49, y = cos_7)[name = string("op_4613")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49)[name = string("x1_97")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49)[name = string("x2_97")]; fp32 const_51_promoted = const()[name = string("const_51_promoted"), val = fp32(-0x1p+0)]; tensor var_4624 = mul(x = x2_97, y = const_51_promoted)[name = string("op_4624")]; bool var_4626_interleave_0 = const()[name = string("op_4626_interleave_0"), val = bool(false)]; tensor var_4626 = concat(axis = var_88, interleave = var_4626_interleave_0, values = (var_4624, x1_97))[name = string("op_4626")]; tensor var_4627 = mul(x = var_4626, y = sin_7)[name = string("op_4627")]; tensor query_states_99 = add(x = var_4613, y = var_4627)[name = string("query_states_99")]; tensor k_49 = transpose(perm = k_49_perm_0, x = var_4606)[name = string("transpose_46")]; tensor var_4629 = mul(x = k_49, y = cos_7)[name = string("op_4629")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49)[name = string("x1_99")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49)[name = string("x2_99")]; fp32 const_52_promoted = const()[name = string("const_52_promoted"), val = fp32(-0x1p+0)]; tensor var_4640 = mul(x = x2_99, y = const_52_promoted)[name = string("op_4640")]; bool var_4642_interleave_0 = const()[name = string("op_4642_interleave_0"), val = bool(false)]; tensor var_4642 = concat(axis = var_88, interleave = var_4642_interleave_0, values = (var_4640, x1_99))[name = string("op_4642")]; tensor var_4643 = mul(x = var_4642, y = sin_7)[name = string("op_4643")]; tensor k_state_49 = add(x = var_4629, y = var_4643)[name = string("k_state_49")]; tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; tensor concat_461_values0_0 = const()[name = string("concat_461_values0_0"), val = tensor([24])]; int32 concat_461_axis_0 = const()[name = string("concat_461_axis_0"), val = int32(0)]; bool concat_461_interleave_0 = const()[name = string("concat_461_interleave_0"), val = bool(false)]; tensor concat_461 = concat(axis = concat_461_axis_0, interleave = concat_461_interleave_0, values = (concat_461_values0_0, expand_dims_288, expand_dims_289, expand_dims_2, expand_dims_291))[name = string("concat_461")]; tensor key_cache_internal_tensor_assign_25_stride_0 = const()[name = string("key_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_25 = slice_update(begin = concat_461, begin_mask = key_cache_internal_tensor_assign_25_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_25_squeeze_mask_0, stride = key_cache_internal_tensor_assign_25_stride_0, update = k_state_49, x = key_cache_internal_tensor_assign_24_1)[name = string("key_cache_internal_tensor_assign_25")]; string cast_349_dtype_0 = const()[name = string("cast_349_dtype_0"), val = string("fp16")]; tensor cast_349 = cast(dtype = cast_349_dtype_0, x = key_cache_internal_tensor_assign_25)[name = string("cast_48")]; write_state(data = cast_349, input = key_cache)[name = string("coreml_update_state_120_write_state")]; tensor coreml_update_state_120 = read_state(input = key_cache)[name = string("coreml_update_state_120")]; string key_cache_internal_tensor_assign_25_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_25_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_25_stride_0 = const()[name = string("value_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_49 = transpose(perm = v_state_49_perm_0, x = var_4609)[name = string("transpose_45")]; tensor value_cache_internal_tensor_assign_25 = slice_update(begin = concat_461, begin_mask = value_cache_internal_tensor_assign_25_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_25_squeeze_mask_0, stride = value_cache_internal_tensor_assign_25_stride_0, update = v_state_49, x = value_cache_internal_tensor_assign_24_1)[name = string("value_cache_internal_tensor_assign_25")]; string cast_350_dtype_0 = const()[name = string("cast_350_dtype_0"), val = string("fp16")]; tensor cast_350 = cast(dtype = cast_350_dtype_0, x = value_cache_internal_tensor_assign_25)[name = string("cast_47")]; write_state(data = cast_350, input = value_cache)[name = string("coreml_update_state_121_write_state")]; tensor coreml_update_state_121 = read_state(input = value_cache)[name = string("coreml_update_state_121")]; string value_cache_internal_tensor_assign_25_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_25_dtype_0"), val = string("fp32")]; tensor var_4666_begin_0 = const()[name = string("op_4666_begin_0"), val = tensor([24, 0, 0, 0, 0])]; tensor var_4666_end_0 = const()[name = string("op_4666_end_0"), val = tensor([25, 1, 2, 2048, 128])]; tensor var_4666_end_mask_0 = const()[name = string("op_4666_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4666_squeeze_mask_0 = const()[name = string("op_4666_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_25_1 = cast(dtype = key_cache_internal_tensor_assign_25_dtype_0, x = coreml_update_state_120)[name = string("cast_46")]; tensor var_4666 = slice_by_index(begin = var_4666_begin_0, end = var_4666_end_0, end_mask = var_4666_end_mask_0, squeeze_mask = var_4666_squeeze_mask_0, x = key_cache_internal_tensor_assign_25_1)[name = string("op_4666")]; tensor var_4669_begin_0 = const()[name = string("op_4669_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4669_end_mask_0 = const()[name = string("op_4669_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4669 = slice_by_index(begin = var_4669_begin_0, end = concat_11, end_mask = var_4669_end_mask_0, x = var_4666)[name = string("op_4669")]; tensor var_4671_begin_0 = const()[name = string("op_4671_begin_0"), val = tensor([24, 0, 0, 0, 0])]; tensor var_4671_end_0 = const()[name = string("op_4671_end_0"), val = tensor([25, 1, 2, 2048, 128])]; tensor var_4671_end_mask_0 = const()[name = string("op_4671_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4671_squeeze_mask_0 = const()[name = string("op_4671_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_25_1 = cast(dtype = value_cache_internal_tensor_assign_25_dtype_0, x = coreml_update_state_121)[name = string("cast_45")]; tensor var_4671 = slice_by_index(begin = var_4671_begin_0, end = var_4671_end_0, end_mask = var_4671_end_mask_0, squeeze_mask = var_4671_squeeze_mask_0, x = value_cache_internal_tensor_assign_25_1)[name = string("op_4671")]; tensor var_4674_begin_0 = const()[name = string("op_4674_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4674_end_mask_0 = const()[name = string("op_4674_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4674 = slice_by_index(begin = var_4674_begin_0, end = concat_11, end_mask = var_4674_end_mask_0, x = var_4671)[name = string("op_4674")]; tensor var_4676_shape = shape(x = var_4669)[name = string("op_4676_shape")]; int32 gather_445 = const()[name = string("gather_445"), val = int32(1)]; int32 gather_446 = const()[name = string("gather_446"), val = int32(2)]; int32 select_447 = const()[name = string("select_447"), val = int32(2)]; int32 gather_447_axis_0 = const()[name = string("gather_447_axis_0"), val = int32(0)]; int32 gather_447_batch_dims_0 = const()[name = string("gather_447_batch_dims_0"), val = int32(0)]; bool gather_447_validate_indices_0 = const()[name = string("gather_447_validate_indices_0"), val = bool(false)]; int32 gather_447 = gather(axis = gather_447_axis_0, batch_dims = gather_447_batch_dims_0, indices = select_447, validate_indices = gather_447_validate_indices_0, x = var_4676_shape)[name = string("gather_447")]; int32 gather_448 = const()[name = string("gather_448"), val = int32(128)]; tensor var_4683_axes_0 = const()[name = string("op_4683_axes_0"), val = tensor([2])]; tensor var_4683 = expand_dims(axes = var_4683_axes_0, x = var_4669)[name = string("op_4683")]; tensor shape_497 = shape(x = var_4683)[name = string("shape_497")]; int32 concat_469_axis_0 = const()[name = string("concat_469_axis_0"), val = int32(0)]; bool concat_469_interleave_0 = const()[name = string("concat_469_interleave_0"), val = bool(false)]; tensor concat_469 = concat(axis = concat_469_axis_0, interleave = concat_469_interleave_0, values = (gather_445, gather_446, var_100, gather_447, gather_448))[name = string("concat_469")]; tensor real_div_48 = real_div(x = concat_469, y = shape_497)[name = string("real_div_48")]; tensor hidden_states_731 = tile(reps = real_div_48, x = var_4683)[name = string("hidden_states_731")]; tensor concat_470x = const()[name = string("concat_470x"), val = tensor([1, 16, -1, 128])]; tensor key_states_99 = reshape(shape = concat_470x, x = hidden_states_731)[name = string("key_states_99")]; tensor var_4693_shape = shape(x = var_4674)[name = string("op_4693_shape")]; int32 gather_449 = const()[name = string("gather_449"), val = int32(1)]; int32 gather_450 = const()[name = string("gather_450"), val = int32(2)]; int32 select_451 = const()[name = string("select_451"), val = int32(2)]; int32 gather_451_axis_0 = const()[name = string("gather_451_axis_0"), val = int32(0)]; int32 gather_451_batch_dims_0 = const()[name = string("gather_451_batch_dims_0"), val = int32(0)]; bool gather_451_validate_indices_0 = const()[name = string("gather_451_validate_indices_0"), val = bool(false)]; int32 gather_451 = gather(axis = gather_451_axis_0, batch_dims = gather_451_batch_dims_0, indices = select_451, validate_indices = gather_451_validate_indices_0, x = var_4693_shape)[name = string("gather_451")]; int32 gather_452 = const()[name = string("gather_452"), val = int32(128)]; tensor var_4700_axes_0 = const()[name = string("op_4700_axes_0"), val = tensor([2])]; tensor var_4700 = expand_dims(axes = var_4700_axes_0, x = var_4674)[name = string("op_4700")]; tensor shape_502 = shape(x = var_4700)[name = string("shape_502")]; int32 concat_471_axis_0 = const()[name = string("concat_471_axis_0"), val = int32(0)]; bool concat_471_interleave_0 = const()[name = string("concat_471_interleave_0"), val = bool(false)]; tensor concat_471 = concat(axis = concat_471_axis_0, interleave = concat_471_interleave_0, values = (gather_449, gather_450, var_100, gather_451, gather_452))[name = string("concat_471")]; tensor real_div_49 = real_div(x = concat_471, y = shape_502)[name = string("real_div_49")]; tensor hidden_states_735 = tile(reps = real_div_49, x = var_4700)[name = string("hidden_states_735")]; tensor concat_472x = const()[name = string("concat_472x"), val = tensor([1, 16, -1, 128])]; tensor value_states_99 = reshape(shape = concat_472x, x = hidden_states_735)[name = string("value_states_99")]; tensor var_4710_shape = shape(x = key_states_99)[name = string("op_4710_shape")]; int32 select_453 = const()[name = string("select_453"), val = int32(2)]; int32 gather_453_axis_0 = const()[name = string("gather_453_axis_0"), val = int32(0)]; int32 gather_453_batch_dims_0 = const()[name = string("gather_453_batch_dims_0"), val = int32(0)]; bool gather_453_validate_indices_0 = const()[name = string("gather_453_validate_indices_0"), val = bool(false)]; int32 gather_453 = gather(axis = gather_453_axis_0, batch_dims = gather_453_batch_dims_0, indices = select_453, validate_indices = gather_453_validate_indices_0, x = var_4710_shape)[name = string("gather_453")]; int32 concat_473_values0_0 = const()[name = string("concat_473_values0_0"), val = int32(1)]; int32 concat_473_values1_0 = const()[name = string("concat_473_values1_0"), val = int32(1)]; int32 concat_473_values2_0 = const()[name = string("concat_473_values2_0"), val = int32(0)]; int32 concat_473_axis_0 = const()[name = string("concat_473_axis_0"), val = int32(0)]; bool concat_473_interleave_0 = const()[name = string("concat_473_interleave_0"), val = bool(false)]; tensor concat_473 = concat(axis = concat_473_axis_0, interleave = concat_473_interleave_0, values = (concat_473_values0_0, concat_473_values1_0, concat_473_values2_0, gather_453))[name = string("concat_473")]; tensor causal_mask_51_begin_0 = const()[name = string("causal_mask_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_51_end_mask_0 = const()[name = string("causal_mask_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_51 = slice_by_index(begin = causal_mask_51_begin_0, end = concat_473, end_mask = causal_mask_51_end_mask_0, x = cast_0)[name = string("causal_mask_51")]; tensor attn_output_97 = scaled_dot_product_attention(attn_mask = causal_mask_51, key = key_states_99, query = query_states_99, value = value_states_99)[name = string("attn_output_97")]; tensor var_4716_perm_0 = const()[name = string("op_4716_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_474x = const()[name = string("concat_474x"), val = tensor([1, -1, 2048])]; tensor var_4716 = transpose(perm = var_4716_perm_0, x = attn_output_97)[name = string("transpose_44")]; tensor input_193 = reshape(shape = concat_474x, x = var_4716)[name = string("input_193")]; tensor linear_171 = linear(bias = linear_3_bias_0, weight = model_model_layers_24_self_attn_o_proj_weight_quantized, x = input_193)[name = string("linear_171")]; tensor hidden_states_739 = add(x = hidden_states_719, y = linear_171)[name = string("hidden_states_739")]; fp32 var_94_promoted_49 = const()[name = string("op_94_promoted_49"), val = fp32(0x1p+1)]; tensor var_4725 = pow(x = hidden_states_739, y = var_94_promoted_49)[name = string("op_4725")]; tensor variance_99_axes_0 = const()[name = string("variance_99_axes_0"), val = tensor([-1])]; bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; tensor variance_99 = reduce_mean(axes = variance_99_axes_0, keep_dims = variance_99_keep_dims_0, x = var_4725)[name = string("variance_99")]; fp32 var_4728 = const()[name = string("op_4728"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4729 = add(x = variance_99, y = var_4728)[name = string("op_4729")]; fp32 var_4730_epsilon_0 = const()[name = string("op_4730_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4730 = rsqrt(epsilon = var_4730_epsilon_0, x = var_4729)[name = string("op_4730")]; tensor hidden_states_743 = mul(x = hidden_states_739, y = var_4730)[name = string("hidden_states_743")]; tensor input_195 = mul(x = model_model_layers_24_post_attention_layernorm_weight, y = hidden_states_743)[name = string("input_195")]; tensor linear_172 = linear(bias = linear_4_bias_0, weight = model_model_layers_24_mlp_gate_proj_weight_quantized, x = input_195)[name = string("linear_172")]; tensor var_4742 = silu(x = linear_172)[name = string("op_4742")]; tensor linear_173 = linear(bias = linear_4_bias_0, weight = model_model_layers_24_mlp_up_proj_weight_quantized, x = input_195)[name = string("linear_173")]; tensor input_199 = mul(x = var_4742, y = linear_173)[name = string("input_199")]; tensor linear_174 = linear(bias = linear_3_bias_0, weight = model_model_layers_24_mlp_down_proj_weight_quantized, x = input_199)[name = string("linear_174")]; tensor hidden_states_749 = add(x = hidden_states_739, y = linear_174)[name = string("hidden_states_749")]; fp32 var_94_promoted_50 = const()[name = string("op_94_promoted_50"), val = fp32(0x1p+1)]; tensor var_4755 = pow(x = hidden_states_749, y = var_94_promoted_50)[name = string("op_4755")]; tensor variance_101_axes_0 = const()[name = string("variance_101_axes_0"), val = tensor([-1])]; bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; tensor variance_101 = reduce_mean(axes = variance_101_axes_0, keep_dims = variance_101_keep_dims_0, x = var_4755)[name = string("variance_101")]; fp32 var_4758 = const()[name = string("op_4758"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4759 = add(x = variance_101, y = var_4758)[name = string("op_4759")]; fp32 var_4760_epsilon_0 = const()[name = string("op_4760_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4760 = rsqrt(epsilon = var_4760_epsilon_0, x = var_4759)[name = string("op_4760")]; tensor hidden_states_753 = mul(x = hidden_states_749, y = var_4760)[name = string("hidden_states_753")]; tensor hidden_states_757 = mul(x = model_model_layers_25_input_layernorm_weight, y = hidden_states_753)[name = string("hidden_states_757")]; tensor linear_175 = linear(bias = model_model_layers_25_self_attn_q_proj_bias, weight = model_model_layers_25_self_attn_q_proj_weight_quantized, x = hidden_states_757)[name = string("linear_175")]; tensor linear_176 = linear(bias = model_model_layers_25_self_attn_k_proj_bias, weight = model_model_layers_25_self_attn_k_proj_weight_quantized, x = hidden_states_757)[name = string("linear_176")]; tensor linear_177 = linear(bias = model_model_layers_25_self_attn_v_proj_bias, weight = model_model_layers_25_self_attn_v_proj_weight_quantized, x = hidden_states_757)[name = string("linear_177")]; tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 16, 128])]; tensor var_4783 = reshape(shape = concat_475x, x = linear_175)[name = string("op_4783")]; tensor q_51_perm_0 = const()[name = string("q_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 2, 128])]; tensor var_4786 = reshape(shape = concat_476x, x = linear_176)[name = string("op_4786")]; tensor k_51_perm_0 = const()[name = string("k_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_477x = const()[name = string("concat_477x"), val = tensor([1, -1, 2, 128])]; tensor var_4789 = reshape(shape = concat_477x, x = linear_177)[name = string("op_4789")]; tensor v_state_51_perm_0 = const()[name = string("v_state_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_51 = transpose(perm = q_51_perm_0, x = var_4783)[name = string("transpose_43")]; tensor var_4793 = mul(x = q_51, y = cos_7)[name = string("op_4793")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_51)[name = string("x1_101")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_51)[name = string("x2_101")]; fp32 const_53_promoted = const()[name = string("const_53_promoted"), val = fp32(-0x1p+0)]; tensor var_4804 = mul(x = x2_101, y = const_53_promoted)[name = string("op_4804")]; bool var_4806_interleave_0 = const()[name = string("op_4806_interleave_0"), val = bool(false)]; tensor var_4806 = concat(axis = var_88, interleave = var_4806_interleave_0, values = (var_4804, x1_101))[name = string("op_4806")]; tensor var_4807 = mul(x = var_4806, y = sin_7)[name = string("op_4807")]; tensor query_states_103 = add(x = var_4793, y = var_4807)[name = string("query_states_103")]; tensor k_51 = transpose(perm = k_51_perm_0, x = var_4786)[name = string("transpose_42")]; tensor var_4809 = mul(x = k_51, y = cos_7)[name = string("op_4809")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_51)[name = string("x1_103")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_51)[name = string("x2_103")]; fp32 const_54_promoted = const()[name = string("const_54_promoted"), val = fp32(-0x1p+0)]; tensor var_4820 = mul(x = x2_103, y = const_54_promoted)[name = string("op_4820")]; bool var_4822_interleave_0 = const()[name = string("op_4822_interleave_0"), val = bool(false)]; tensor var_4822 = concat(axis = var_88, interleave = var_4822_interleave_0, values = (var_4820, x1_103))[name = string("op_4822")]; tensor var_4823 = mul(x = var_4822, y = sin_7)[name = string("op_4823")]; tensor k_state_51 = add(x = var_4809, y = var_4823)[name = string("k_state_51")]; tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([0])]; tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; tensor concat_480_values0_0 = const()[name = string("concat_480_values0_0"), val = tensor([25])]; int32 concat_480_axis_0 = const()[name = string("concat_480_axis_0"), val = int32(0)]; bool concat_480_interleave_0 = const()[name = string("concat_480_interleave_0"), val = bool(false)]; tensor concat_480 = concat(axis = concat_480_axis_0, interleave = concat_480_interleave_0, values = (concat_480_values0_0, expand_dims_300, expand_dims_301, expand_dims_2, expand_dims_303))[name = string("concat_480")]; tensor key_cache_internal_tensor_assign_26_stride_0 = const()[name = string("key_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_26 = slice_update(begin = concat_480, begin_mask = key_cache_internal_tensor_assign_26_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_26_squeeze_mask_0, stride = key_cache_internal_tensor_assign_26_stride_0, update = k_state_51, x = key_cache_internal_tensor_assign_25_1)[name = string("key_cache_internal_tensor_assign_26")]; string cast_363_dtype_0 = const()[name = string("cast_363_dtype_0"), val = string("fp16")]; tensor cast_363 = cast(dtype = cast_363_dtype_0, x = key_cache_internal_tensor_assign_26)[name = string("cast_44")]; write_state(data = cast_363, input = key_cache)[name = string("coreml_update_state_122_write_state")]; tensor coreml_update_state_122 = read_state(input = key_cache)[name = string("coreml_update_state_122")]; string key_cache_internal_tensor_assign_26_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_26_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_26_stride_0 = const()[name = string("value_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_51 = transpose(perm = v_state_51_perm_0, x = var_4789)[name = string("transpose_41")]; tensor value_cache_internal_tensor_assign_26 = slice_update(begin = concat_480, begin_mask = value_cache_internal_tensor_assign_26_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_26_squeeze_mask_0, stride = value_cache_internal_tensor_assign_26_stride_0, update = v_state_51, x = value_cache_internal_tensor_assign_25_1)[name = string("value_cache_internal_tensor_assign_26")]; string cast_364_dtype_0 = const()[name = string("cast_364_dtype_0"), val = string("fp16")]; tensor cast_364 = cast(dtype = cast_364_dtype_0, x = value_cache_internal_tensor_assign_26)[name = string("cast_43")]; write_state(data = cast_364, input = value_cache)[name = string("coreml_update_state_123_write_state")]; tensor coreml_update_state_123 = read_state(input = value_cache)[name = string("coreml_update_state_123")]; string value_cache_internal_tensor_assign_26_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_26_dtype_0"), val = string("fp32")]; tensor var_4846_begin_0 = const()[name = string("op_4846_begin_0"), val = tensor([25, 0, 0, 0, 0])]; tensor var_4846_end_0 = const()[name = string("op_4846_end_0"), val = tensor([26, 1, 2, 2048, 128])]; tensor var_4846_end_mask_0 = const()[name = string("op_4846_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4846_squeeze_mask_0 = const()[name = string("op_4846_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_26_1 = cast(dtype = key_cache_internal_tensor_assign_26_dtype_0, x = coreml_update_state_122)[name = string("cast_42")]; tensor var_4846 = slice_by_index(begin = var_4846_begin_0, end = var_4846_end_0, end_mask = var_4846_end_mask_0, squeeze_mask = var_4846_squeeze_mask_0, x = key_cache_internal_tensor_assign_26_1)[name = string("op_4846")]; tensor var_4849_begin_0 = const()[name = string("op_4849_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4849_end_mask_0 = const()[name = string("op_4849_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4849 = slice_by_index(begin = var_4849_begin_0, end = concat_11, end_mask = var_4849_end_mask_0, x = var_4846)[name = string("op_4849")]; tensor var_4851_begin_0 = const()[name = string("op_4851_begin_0"), val = tensor([25, 0, 0, 0, 0])]; tensor var_4851_end_0 = const()[name = string("op_4851_end_0"), val = tensor([26, 1, 2, 2048, 128])]; tensor var_4851_end_mask_0 = const()[name = string("op_4851_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4851_squeeze_mask_0 = const()[name = string("op_4851_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_26_1 = cast(dtype = value_cache_internal_tensor_assign_26_dtype_0, x = coreml_update_state_123)[name = string("cast_41")]; tensor var_4851 = slice_by_index(begin = var_4851_begin_0, end = var_4851_end_0, end_mask = var_4851_end_mask_0, squeeze_mask = var_4851_squeeze_mask_0, x = value_cache_internal_tensor_assign_26_1)[name = string("op_4851")]; tensor var_4854_begin_0 = const()[name = string("op_4854_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4854_end_mask_0 = const()[name = string("op_4854_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4854 = slice_by_index(begin = var_4854_begin_0, end = concat_11, end_mask = var_4854_end_mask_0, x = var_4851)[name = string("op_4854")]; tensor var_4856_shape = shape(x = var_4849)[name = string("op_4856_shape")]; int32 gather_463 = const()[name = string("gather_463"), val = int32(1)]; int32 gather_464 = const()[name = string("gather_464"), val = int32(2)]; int32 select_465 = const()[name = string("select_465"), val = int32(2)]; int32 gather_465_axis_0 = const()[name = string("gather_465_axis_0"), val = int32(0)]; int32 gather_465_batch_dims_0 = const()[name = string("gather_465_batch_dims_0"), val = int32(0)]; bool gather_465_validate_indices_0 = const()[name = string("gather_465_validate_indices_0"), val = bool(false)]; int32 gather_465 = gather(axis = gather_465_axis_0, batch_dims = gather_465_batch_dims_0, indices = select_465, validate_indices = gather_465_validate_indices_0, x = var_4856_shape)[name = string("gather_465")]; int32 gather_466 = const()[name = string("gather_466"), val = int32(128)]; tensor var_4863_axes_0 = const()[name = string("op_4863_axes_0"), val = tensor([2])]; tensor var_4863 = expand_dims(axes = var_4863_axes_0, x = var_4849)[name = string("op_4863")]; tensor shape_517 = shape(x = var_4863)[name = string("shape_517")]; int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (gather_463, gather_464, var_100, gather_465, gather_466))[name = string("concat_488")]; tensor real_div_50 = real_div(x = concat_488, y = shape_517)[name = string("real_div_50")]; tensor hidden_states_761 = tile(reps = real_div_50, x = var_4863)[name = string("hidden_states_761")]; tensor concat_489x = const()[name = string("concat_489x"), val = tensor([1, 16, -1, 128])]; tensor key_states_103 = reshape(shape = concat_489x, x = hidden_states_761)[name = string("key_states_103")]; tensor var_4873_shape = shape(x = var_4854)[name = string("op_4873_shape")]; int32 gather_467 = const()[name = string("gather_467"), val = int32(1)]; int32 gather_468 = const()[name = string("gather_468"), val = int32(2)]; int32 select_469 = const()[name = string("select_469"), val = int32(2)]; int32 gather_469_axis_0 = const()[name = string("gather_469_axis_0"), val = int32(0)]; int32 gather_469_batch_dims_0 = const()[name = string("gather_469_batch_dims_0"), val = int32(0)]; bool gather_469_validate_indices_0 = const()[name = string("gather_469_validate_indices_0"), val = bool(false)]; int32 gather_469 = gather(axis = gather_469_axis_0, batch_dims = gather_469_batch_dims_0, indices = select_469, validate_indices = gather_469_validate_indices_0, x = var_4873_shape)[name = string("gather_469")]; int32 gather_470 = const()[name = string("gather_470"), val = int32(128)]; tensor var_4880_axes_0 = const()[name = string("op_4880_axes_0"), val = tensor([2])]; tensor var_4880 = expand_dims(axes = var_4880_axes_0, x = var_4854)[name = string("op_4880")]; tensor shape_522 = shape(x = var_4880)[name = string("shape_522")]; int32 concat_490_axis_0 = const()[name = string("concat_490_axis_0"), val = int32(0)]; bool concat_490_interleave_0 = const()[name = string("concat_490_interleave_0"), val = bool(false)]; tensor concat_490 = concat(axis = concat_490_axis_0, interleave = concat_490_interleave_0, values = (gather_467, gather_468, var_100, gather_469, gather_470))[name = string("concat_490")]; tensor real_div_51 = real_div(x = concat_490, y = shape_522)[name = string("real_div_51")]; tensor hidden_states_765 = tile(reps = real_div_51, x = var_4880)[name = string("hidden_states_765")]; tensor concat_491x = const()[name = string("concat_491x"), val = tensor([1, 16, -1, 128])]; tensor value_states_103 = reshape(shape = concat_491x, x = hidden_states_765)[name = string("value_states_103")]; tensor var_4890_shape = shape(x = key_states_103)[name = string("op_4890_shape")]; int32 select_471 = const()[name = string("select_471"), val = int32(2)]; int32 gather_471_axis_0 = const()[name = string("gather_471_axis_0"), val = int32(0)]; int32 gather_471_batch_dims_0 = const()[name = string("gather_471_batch_dims_0"), val = int32(0)]; bool gather_471_validate_indices_0 = const()[name = string("gather_471_validate_indices_0"), val = bool(false)]; int32 gather_471 = gather(axis = gather_471_axis_0, batch_dims = gather_471_batch_dims_0, indices = select_471, validate_indices = gather_471_validate_indices_0, x = var_4890_shape)[name = string("gather_471")]; int32 concat_492_values0_0 = const()[name = string("concat_492_values0_0"), val = int32(1)]; int32 concat_492_values1_0 = const()[name = string("concat_492_values1_0"), val = int32(1)]; int32 concat_492_values2_0 = const()[name = string("concat_492_values2_0"), val = int32(0)]; int32 concat_492_axis_0 = const()[name = string("concat_492_axis_0"), val = int32(0)]; bool concat_492_interleave_0 = const()[name = string("concat_492_interleave_0"), val = bool(false)]; tensor concat_492 = concat(axis = concat_492_axis_0, interleave = concat_492_interleave_0, values = (concat_492_values0_0, concat_492_values1_0, concat_492_values2_0, gather_471))[name = string("concat_492")]; tensor causal_mask_53_begin_0 = const()[name = string("causal_mask_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_53_end_mask_0 = const()[name = string("causal_mask_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_53 = slice_by_index(begin = causal_mask_53_begin_0, end = concat_492, end_mask = causal_mask_53_end_mask_0, x = cast_0)[name = string("causal_mask_53")]; tensor attn_output_101 = scaled_dot_product_attention(attn_mask = causal_mask_53, key = key_states_103, query = query_states_103, value = value_states_103)[name = string("attn_output_101")]; tensor var_4896_perm_0 = const()[name = string("op_4896_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_493x = const()[name = string("concat_493x"), val = tensor([1, -1, 2048])]; tensor var_4896 = transpose(perm = var_4896_perm_0, x = attn_output_101)[name = string("transpose_40")]; tensor input_201 = reshape(shape = concat_493x, x = var_4896)[name = string("input_201")]; tensor linear_178 = linear(bias = linear_3_bias_0, weight = model_model_layers_25_self_attn_o_proj_weight_quantized, x = input_201)[name = string("linear_178")]; tensor hidden_states_769 = add(x = hidden_states_749, y = linear_178)[name = string("hidden_states_769")]; fp32 var_94_promoted_51 = const()[name = string("op_94_promoted_51"), val = fp32(0x1p+1)]; tensor var_4905 = pow(x = hidden_states_769, y = var_94_promoted_51)[name = string("op_4905")]; tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([-1])]; bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; tensor variance_103 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = var_4905)[name = string("variance_103")]; fp32 var_4908 = const()[name = string("op_4908"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4909 = add(x = variance_103, y = var_4908)[name = string("op_4909")]; fp32 var_4910_epsilon_0 = const()[name = string("op_4910_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4910 = rsqrt(epsilon = var_4910_epsilon_0, x = var_4909)[name = string("op_4910")]; tensor hidden_states_773 = mul(x = hidden_states_769, y = var_4910)[name = string("hidden_states_773")]; tensor input_203 = mul(x = model_model_layers_25_post_attention_layernorm_weight, y = hidden_states_773)[name = string("input_203")]; tensor linear_179 = linear(bias = linear_4_bias_0, weight = model_model_layers_25_mlp_gate_proj_weight_quantized, x = input_203)[name = string("linear_179")]; tensor var_4922 = silu(x = linear_179)[name = string("op_4922")]; tensor linear_180 = linear(bias = linear_4_bias_0, weight = model_model_layers_25_mlp_up_proj_weight_quantized, x = input_203)[name = string("linear_180")]; tensor input_207 = mul(x = var_4922, y = linear_180)[name = string("input_207")]; tensor linear_181 = linear(bias = linear_3_bias_0, weight = model_model_layers_25_mlp_down_proj_weight_quantized, x = input_207)[name = string("linear_181")]; tensor hidden_states_779 = add(x = hidden_states_769, y = linear_181)[name = string("hidden_states_779")]; fp32 var_94_promoted_52 = const()[name = string("op_94_promoted_52"), val = fp32(0x1p+1)]; tensor var_4935 = pow(x = hidden_states_779, y = var_94_promoted_52)[name = string("op_4935")]; tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([-1])]; bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; tensor variance_105 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = var_4935)[name = string("variance_105")]; fp32 var_4938 = const()[name = string("op_4938"), val = fp32(0x1.0c6f7ap-20)]; tensor var_4939 = add(x = variance_105, y = var_4938)[name = string("op_4939")]; fp32 var_4940_epsilon_0 = const()[name = string("op_4940_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4940 = rsqrt(epsilon = var_4940_epsilon_0, x = var_4939)[name = string("op_4940")]; tensor hidden_states_783 = mul(x = hidden_states_779, y = var_4940)[name = string("hidden_states_783")]; tensor hidden_states_787 = mul(x = model_model_layers_26_input_layernorm_weight, y = hidden_states_783)[name = string("hidden_states_787")]; tensor linear_182 = linear(bias = model_model_layers_26_self_attn_q_proj_bias, weight = model_model_layers_26_self_attn_q_proj_weight_quantized, x = hidden_states_787)[name = string("linear_182")]; tensor linear_183 = linear(bias = model_model_layers_26_self_attn_k_proj_bias, weight = model_model_layers_26_self_attn_k_proj_weight_quantized, x = hidden_states_787)[name = string("linear_183")]; tensor linear_184 = linear(bias = model_model_layers_26_self_attn_v_proj_bias, weight = model_model_layers_26_self_attn_v_proj_weight_quantized, x = hidden_states_787)[name = string("linear_184")]; tensor concat_494x = const()[name = string("concat_494x"), val = tensor([1, -1, 16, 128])]; tensor var_4963 = reshape(shape = concat_494x, x = linear_182)[name = string("op_4963")]; tensor q_53_perm_0 = const()[name = string("q_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_495x = const()[name = string("concat_495x"), val = tensor([1, -1, 2, 128])]; tensor var_4966 = reshape(shape = concat_495x, x = linear_183)[name = string("op_4966")]; tensor k_53_perm_0 = const()[name = string("k_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 2, 128])]; tensor var_4969 = reshape(shape = concat_496x, x = linear_184)[name = string("op_4969")]; tensor v_state_53_perm_0 = const()[name = string("v_state_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_53 = transpose(perm = q_53_perm_0, x = var_4963)[name = string("transpose_39")]; tensor var_4973 = mul(x = q_53, y = cos_7)[name = string("op_4973")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_53)[name = string("x1_105")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_53)[name = string("x2_105")]; fp32 const_55_promoted = const()[name = string("const_55_promoted"), val = fp32(-0x1p+0)]; tensor var_4984 = mul(x = x2_105, y = const_55_promoted)[name = string("op_4984")]; bool var_4986_interleave_0 = const()[name = string("op_4986_interleave_0"), val = bool(false)]; tensor var_4986 = concat(axis = var_88, interleave = var_4986_interleave_0, values = (var_4984, x1_105))[name = string("op_4986")]; tensor var_4987 = mul(x = var_4986, y = sin_7)[name = string("op_4987")]; tensor query_states_107 = add(x = var_4973, y = var_4987)[name = string("query_states_107")]; tensor k_53 = transpose(perm = k_53_perm_0, x = var_4966)[name = string("transpose_38")]; tensor var_4989 = mul(x = k_53, y = cos_7)[name = string("op_4989")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_53)[name = string("x1_107")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_53)[name = string("x2_107")]; fp32 const_56_promoted = const()[name = string("const_56_promoted"), val = fp32(-0x1p+0)]; tensor var_5000 = mul(x = x2_107, y = const_56_promoted)[name = string("op_5000")]; bool var_5002_interleave_0 = const()[name = string("op_5002_interleave_0"), val = bool(false)]; tensor var_5002 = concat(axis = var_88, interleave = var_5002_interleave_0, values = (var_5000, x1_107))[name = string("op_5002")]; tensor var_5003 = mul(x = var_5002, y = sin_7)[name = string("op_5003")]; tensor k_state_53 = add(x = var_4989, y = var_5003)[name = string("k_state_53")]; tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([0])]; tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; tensor concat_499_values0_0 = const()[name = string("concat_499_values0_0"), val = tensor([26])]; int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (concat_499_values0_0, expand_dims_312, expand_dims_313, expand_dims_2, expand_dims_315))[name = string("concat_499")]; tensor key_cache_internal_tensor_assign_27_stride_0 = const()[name = string("key_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_27 = slice_update(begin = concat_499, begin_mask = key_cache_internal_tensor_assign_27_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_27_squeeze_mask_0, stride = key_cache_internal_tensor_assign_27_stride_0, update = k_state_53, x = key_cache_internal_tensor_assign_26_1)[name = string("key_cache_internal_tensor_assign_27")]; string cast_377_dtype_0 = const()[name = string("cast_377_dtype_0"), val = string("fp16")]; tensor cast_377 = cast(dtype = cast_377_dtype_0, x = key_cache_internal_tensor_assign_27)[name = string("cast_40")]; write_state(data = cast_377, input = key_cache)[name = string("coreml_update_state_124_write_state")]; tensor coreml_update_state_124 = read_state(input = key_cache)[name = string("coreml_update_state_124")]; string key_cache_internal_tensor_assign_27_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_27_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_27_stride_0 = const()[name = string("value_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_53 = transpose(perm = v_state_53_perm_0, x = var_4969)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_27 = slice_update(begin = concat_499, begin_mask = value_cache_internal_tensor_assign_27_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_27_squeeze_mask_0, stride = value_cache_internal_tensor_assign_27_stride_0, update = v_state_53, x = value_cache_internal_tensor_assign_26_1)[name = string("value_cache_internal_tensor_assign_27")]; string cast_378_dtype_0 = const()[name = string("cast_378_dtype_0"), val = string("fp16")]; tensor cast_378 = cast(dtype = cast_378_dtype_0, x = value_cache_internal_tensor_assign_27)[name = string("cast_39")]; write_state(data = cast_378, input = value_cache)[name = string("coreml_update_state_125_write_state")]; tensor coreml_update_state_125 = read_state(input = value_cache)[name = string("coreml_update_state_125")]; string value_cache_internal_tensor_assign_27_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_27_dtype_0"), val = string("fp32")]; tensor var_5026_begin_0 = const()[name = string("op_5026_begin_0"), val = tensor([26, 0, 0, 0, 0])]; tensor var_5026_end_0 = const()[name = string("op_5026_end_0"), val = tensor([27, 1, 2, 2048, 128])]; tensor var_5026_end_mask_0 = const()[name = string("op_5026_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5026_squeeze_mask_0 = const()[name = string("op_5026_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_27_1 = cast(dtype = key_cache_internal_tensor_assign_27_dtype_0, x = coreml_update_state_124)[name = string("cast_38")]; tensor var_5026 = slice_by_index(begin = var_5026_begin_0, end = var_5026_end_0, end_mask = var_5026_end_mask_0, squeeze_mask = var_5026_squeeze_mask_0, x = key_cache_internal_tensor_assign_27_1)[name = string("op_5026")]; tensor var_5029_begin_0 = const()[name = string("op_5029_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5029_end_mask_0 = const()[name = string("op_5029_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5029 = slice_by_index(begin = var_5029_begin_0, end = concat_11, end_mask = var_5029_end_mask_0, x = var_5026)[name = string("op_5029")]; tensor var_5031_begin_0 = const()[name = string("op_5031_begin_0"), val = tensor([26, 0, 0, 0, 0])]; tensor var_5031_end_0 = const()[name = string("op_5031_end_0"), val = tensor([27, 1, 2, 2048, 128])]; tensor var_5031_end_mask_0 = const()[name = string("op_5031_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5031_squeeze_mask_0 = const()[name = string("op_5031_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_27_1 = cast(dtype = value_cache_internal_tensor_assign_27_dtype_0, x = coreml_update_state_125)[name = string("cast_37")]; tensor var_5031 = slice_by_index(begin = var_5031_begin_0, end = var_5031_end_0, end_mask = var_5031_end_mask_0, squeeze_mask = var_5031_squeeze_mask_0, x = value_cache_internal_tensor_assign_27_1)[name = string("op_5031")]; tensor var_5034_begin_0 = const()[name = string("op_5034_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5034_end_mask_0 = const()[name = string("op_5034_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5034 = slice_by_index(begin = var_5034_begin_0, end = concat_11, end_mask = var_5034_end_mask_0, x = var_5031)[name = string("op_5034")]; tensor var_5036_shape = shape(x = var_5029)[name = string("op_5036_shape")]; int32 gather_481 = const()[name = string("gather_481"), val = int32(1)]; int32 gather_482 = const()[name = string("gather_482"), val = int32(2)]; int32 select_483 = const()[name = string("select_483"), val = int32(2)]; int32 gather_483_axis_0 = const()[name = string("gather_483_axis_0"), val = int32(0)]; int32 gather_483_batch_dims_0 = const()[name = string("gather_483_batch_dims_0"), val = int32(0)]; bool gather_483_validate_indices_0 = const()[name = string("gather_483_validate_indices_0"), val = bool(false)]; int32 gather_483 = gather(axis = gather_483_axis_0, batch_dims = gather_483_batch_dims_0, indices = select_483, validate_indices = gather_483_validate_indices_0, x = var_5036_shape)[name = string("gather_483")]; int32 gather_484 = const()[name = string("gather_484"), val = int32(128)]; tensor var_5043_axes_0 = const()[name = string("op_5043_axes_0"), val = tensor([2])]; tensor var_5043 = expand_dims(axes = var_5043_axes_0, x = var_5029)[name = string("op_5043")]; tensor shape_537 = shape(x = var_5043)[name = string("shape_537")]; int32 concat_507_axis_0 = const()[name = string("concat_507_axis_0"), val = int32(0)]; bool concat_507_interleave_0 = const()[name = string("concat_507_interleave_0"), val = bool(false)]; tensor concat_507 = concat(axis = concat_507_axis_0, interleave = concat_507_interleave_0, values = (gather_481, gather_482, var_100, gather_483, gather_484))[name = string("concat_507")]; tensor real_div_52 = real_div(x = concat_507, y = shape_537)[name = string("real_div_52")]; tensor hidden_states_791 = tile(reps = real_div_52, x = var_5043)[name = string("hidden_states_791")]; tensor concat_508x = const()[name = string("concat_508x"), val = tensor([1, 16, -1, 128])]; tensor key_states_107 = reshape(shape = concat_508x, x = hidden_states_791)[name = string("key_states_107")]; tensor var_5053_shape = shape(x = var_5034)[name = string("op_5053_shape")]; int32 gather_485 = const()[name = string("gather_485"), val = int32(1)]; int32 gather_486 = const()[name = string("gather_486"), val = int32(2)]; int32 select_487 = const()[name = string("select_487"), val = int32(2)]; int32 gather_487_axis_0 = const()[name = string("gather_487_axis_0"), val = int32(0)]; int32 gather_487_batch_dims_0 = const()[name = string("gather_487_batch_dims_0"), val = int32(0)]; bool gather_487_validate_indices_0 = const()[name = string("gather_487_validate_indices_0"), val = bool(false)]; int32 gather_487 = gather(axis = gather_487_axis_0, batch_dims = gather_487_batch_dims_0, indices = select_487, validate_indices = gather_487_validate_indices_0, x = var_5053_shape)[name = string("gather_487")]; int32 gather_488 = const()[name = string("gather_488"), val = int32(128)]; tensor var_5060_axes_0 = const()[name = string("op_5060_axes_0"), val = tensor([2])]; tensor var_5060 = expand_dims(axes = var_5060_axes_0, x = var_5034)[name = string("op_5060")]; tensor shape_542 = shape(x = var_5060)[name = string("shape_542")]; int32 concat_509_axis_0 = const()[name = string("concat_509_axis_0"), val = int32(0)]; bool concat_509_interleave_0 = const()[name = string("concat_509_interleave_0"), val = bool(false)]; tensor concat_509 = concat(axis = concat_509_axis_0, interleave = concat_509_interleave_0, values = (gather_485, gather_486, var_100, gather_487, gather_488))[name = string("concat_509")]; tensor real_div_53 = real_div(x = concat_509, y = shape_542)[name = string("real_div_53")]; tensor hidden_states_795 = tile(reps = real_div_53, x = var_5060)[name = string("hidden_states_795")]; tensor concat_510x = const()[name = string("concat_510x"), val = tensor([1, 16, -1, 128])]; tensor value_states_107 = reshape(shape = concat_510x, x = hidden_states_795)[name = string("value_states_107")]; tensor var_5070_shape = shape(x = key_states_107)[name = string("op_5070_shape")]; int32 select_489 = const()[name = string("select_489"), val = int32(2)]; int32 gather_489_axis_0 = const()[name = string("gather_489_axis_0"), val = int32(0)]; int32 gather_489_batch_dims_0 = const()[name = string("gather_489_batch_dims_0"), val = int32(0)]; bool gather_489_validate_indices_0 = const()[name = string("gather_489_validate_indices_0"), val = bool(false)]; int32 gather_489 = gather(axis = gather_489_axis_0, batch_dims = gather_489_batch_dims_0, indices = select_489, validate_indices = gather_489_validate_indices_0, x = var_5070_shape)[name = string("gather_489")]; int32 concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = int32(1)]; int32 concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = int32(1)]; int32 concat_511_values2_0 = const()[name = string("concat_511_values2_0"), val = int32(0)]; int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, concat_511_values2_0, gather_489))[name = string("concat_511")]; tensor causal_mask_55_begin_0 = const()[name = string("causal_mask_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_55_end_mask_0 = const()[name = string("causal_mask_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_55 = slice_by_index(begin = causal_mask_55_begin_0, end = concat_511, end_mask = causal_mask_55_end_mask_0, x = cast_0)[name = string("causal_mask_55")]; tensor attn_output_105 = scaled_dot_product_attention(attn_mask = causal_mask_55, key = key_states_107, query = query_states_107, value = value_states_107)[name = string("attn_output_105")]; tensor var_5076_perm_0 = const()[name = string("op_5076_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_512x = const()[name = string("concat_512x"), val = tensor([1, -1, 2048])]; tensor var_5076 = transpose(perm = var_5076_perm_0, x = attn_output_105)[name = string("transpose_36")]; tensor input_209 = reshape(shape = concat_512x, x = var_5076)[name = string("input_209")]; tensor linear_185 = linear(bias = linear_3_bias_0, weight = model_model_layers_26_self_attn_o_proj_weight_quantized, x = input_209)[name = string("linear_185")]; tensor hidden_states_799 = add(x = hidden_states_779, y = linear_185)[name = string("hidden_states_799")]; fp32 var_94_promoted_53 = const()[name = string("op_94_promoted_53"), val = fp32(0x1p+1)]; tensor var_5085 = pow(x = hidden_states_799, y = var_94_promoted_53)[name = string("op_5085")]; tensor variance_107_axes_0 = const()[name = string("variance_107_axes_0"), val = tensor([-1])]; bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; tensor variance_107 = reduce_mean(axes = variance_107_axes_0, keep_dims = variance_107_keep_dims_0, x = var_5085)[name = string("variance_107")]; fp32 var_5088 = const()[name = string("op_5088"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5089 = add(x = variance_107, y = var_5088)[name = string("op_5089")]; fp32 var_5090_epsilon_0 = const()[name = string("op_5090_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5090 = rsqrt(epsilon = var_5090_epsilon_0, x = var_5089)[name = string("op_5090")]; tensor hidden_states_803 = mul(x = hidden_states_799, y = var_5090)[name = string("hidden_states_803")]; tensor input_211 = mul(x = model_model_layers_26_post_attention_layernorm_weight, y = hidden_states_803)[name = string("input_211")]; tensor linear_186 = linear(bias = linear_4_bias_0, weight = model_model_layers_26_mlp_gate_proj_weight_quantized, x = input_211)[name = string("linear_186")]; tensor var_5102 = silu(x = linear_186)[name = string("op_5102")]; tensor linear_187 = linear(bias = linear_4_bias_0, weight = model_model_layers_26_mlp_up_proj_weight_quantized, x = input_211)[name = string("linear_187")]; tensor input_215 = mul(x = var_5102, y = linear_187)[name = string("input_215")]; tensor linear_188 = linear(bias = linear_3_bias_0, weight = model_model_layers_26_mlp_down_proj_weight_quantized, x = input_215)[name = string("linear_188")]; tensor hidden_states_809 = add(x = hidden_states_799, y = linear_188)[name = string("hidden_states_809")]; fp32 var_94_promoted_54 = const()[name = string("op_94_promoted_54"), val = fp32(0x1p+1)]; tensor var_5115 = pow(x = hidden_states_809, y = var_94_promoted_54)[name = string("op_5115")]; tensor variance_109_axes_0 = const()[name = string("variance_109_axes_0"), val = tensor([-1])]; bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; tensor variance_109 = reduce_mean(axes = variance_109_axes_0, keep_dims = variance_109_keep_dims_0, x = var_5115)[name = string("variance_109")]; fp32 var_5118 = const()[name = string("op_5118"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5119 = add(x = variance_109, y = var_5118)[name = string("op_5119")]; fp32 var_5120_epsilon_0 = const()[name = string("op_5120_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5120 = rsqrt(epsilon = var_5120_epsilon_0, x = var_5119)[name = string("op_5120")]; tensor hidden_states_813 = mul(x = hidden_states_809, y = var_5120)[name = string("hidden_states_813")]; tensor hidden_states_817 = mul(x = model_model_layers_27_input_layernorm_weight, y = hidden_states_813)[name = string("hidden_states_817")]; tensor linear_189 = linear(bias = model_model_layers_27_self_attn_q_proj_bias, weight = model_model_layers_27_self_attn_q_proj_weight_quantized, x = hidden_states_817)[name = string("linear_189")]; tensor linear_190 = linear(bias = model_model_layers_27_self_attn_k_proj_bias, weight = model_model_layers_27_self_attn_k_proj_weight_quantized, x = hidden_states_817)[name = string("linear_190")]; tensor linear_191 = linear(bias = model_model_layers_27_self_attn_v_proj_bias, weight = model_model_layers_27_self_attn_v_proj_weight_quantized, x = hidden_states_817)[name = string("linear_191")]; tensor concat_513x = const()[name = string("concat_513x"), val = tensor([1, -1, 16, 128])]; tensor var_5143 = reshape(shape = concat_513x, x = linear_189)[name = string("op_5143")]; tensor q_55_perm_0 = const()[name = string("q_55_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_514x = const()[name = string("concat_514x"), val = tensor([1, -1, 2, 128])]; tensor var_5146 = reshape(shape = concat_514x, x = linear_190)[name = string("op_5146")]; tensor k_55_perm_0 = const()[name = string("k_55_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_515x = const()[name = string("concat_515x"), val = tensor([1, -1, 2, 128])]; tensor var_5149 = reshape(shape = concat_515x, x = linear_191)[name = string("op_5149")]; tensor v_state_55_perm_0 = const()[name = string("v_state_55_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_55 = transpose(perm = q_55_perm_0, x = var_5143)[name = string("transpose_35")]; tensor var_5153 = mul(x = q_55, y = cos_7)[name = string("op_5153")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q_55)[name = string("x1_109")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q_55)[name = string("x2_109")]; fp32 const_57_promoted = const()[name = string("const_57_promoted"), val = fp32(-0x1p+0)]; tensor var_5164 = mul(x = x2_109, y = const_57_promoted)[name = string("op_5164")]; bool var_5166_interleave_0 = const()[name = string("op_5166_interleave_0"), val = bool(false)]; tensor var_5166 = concat(axis = var_88, interleave = var_5166_interleave_0, values = (var_5164, x1_109))[name = string("op_5166")]; tensor var_5167 = mul(x = var_5166, y = sin_7)[name = string("op_5167")]; tensor query_states_111 = add(x = var_5153, y = var_5167)[name = string("query_states_111")]; tensor k_55 = transpose(perm = k_55_perm_0, x = var_5146)[name = string("transpose_34")]; tensor var_5169 = mul(x = k_55, y = cos_7)[name = string("op_5169")]; tensor x1_111_begin_0 = const()[name = string("x1_111_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_111_end_0 = const()[name = string("x1_111_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_111_end_mask_0 = const()[name = string("x1_111_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_111 = slice_by_index(begin = x1_111_begin_0, end = x1_111_end_0, end_mask = x1_111_end_mask_0, x = k_55)[name = string("x1_111")]; tensor x2_111_begin_0 = const()[name = string("x2_111_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_111_end_0 = const()[name = string("x2_111_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_111_end_mask_0 = const()[name = string("x2_111_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_111 = slice_by_index(begin = x2_111_begin_0, end = x2_111_end_0, end_mask = x2_111_end_mask_0, x = k_55)[name = string("x2_111")]; fp32 const_58_promoted = const()[name = string("const_58_promoted"), val = fp32(-0x1p+0)]; tensor var_5180 = mul(x = x2_111, y = const_58_promoted)[name = string("op_5180")]; bool var_5182_interleave_0 = const()[name = string("op_5182_interleave_0"), val = bool(false)]; tensor var_5182 = concat(axis = var_88, interleave = var_5182_interleave_0, values = (var_5180, x1_111))[name = string("op_5182")]; tensor var_5183 = mul(x = var_5182, y = sin_7)[name = string("op_5183")]; tensor k_state_55 = add(x = var_5169, y = var_5183)[name = string("k_state_55")]; tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([0])]; tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; tensor concat_518_values0_0 = const()[name = string("concat_518_values0_0"), val = tensor([27])]; int32 concat_518_axis_0 = const()[name = string("concat_518_axis_0"), val = int32(0)]; bool concat_518_interleave_0 = const()[name = string("concat_518_interleave_0"), val = bool(false)]; tensor concat_518 = concat(axis = concat_518_axis_0, interleave = concat_518_interleave_0, values = (concat_518_values0_0, expand_dims_324, expand_dims_325, expand_dims_2, expand_dims_327))[name = string("concat_518")]; tensor key_cache_internal_tensor_assign_28_stride_0 = const()[name = string("key_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_28 = slice_update(begin = concat_518, begin_mask = key_cache_internal_tensor_assign_28_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_28_squeeze_mask_0, stride = key_cache_internal_tensor_assign_28_stride_0, update = k_state_55, x = key_cache_internal_tensor_assign_27_1)[name = string("key_cache_internal_tensor_assign_28")]; string cast_391_dtype_0 = const()[name = string("cast_391_dtype_0"), val = string("fp16")]; tensor cast_391 = cast(dtype = cast_391_dtype_0, x = key_cache_internal_tensor_assign_28)[name = string("cast_36")]; write_state(data = cast_391, input = key_cache)[name = string("coreml_update_state_126_write_state")]; tensor coreml_update_state_126 = read_state(input = key_cache)[name = string("coreml_update_state_126")]; string key_cache_internal_tensor_assign_28_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_28_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_28_stride_0 = const()[name = string("value_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_55 = transpose(perm = v_state_55_perm_0, x = var_5149)[name = string("transpose_33")]; tensor value_cache_internal_tensor_assign_28 = slice_update(begin = concat_518, begin_mask = value_cache_internal_tensor_assign_28_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_28_squeeze_mask_0, stride = value_cache_internal_tensor_assign_28_stride_0, update = v_state_55, x = value_cache_internal_tensor_assign_27_1)[name = string("value_cache_internal_tensor_assign_28")]; string cast_392_dtype_0 = const()[name = string("cast_392_dtype_0"), val = string("fp16")]; tensor cast_392 = cast(dtype = cast_392_dtype_0, x = value_cache_internal_tensor_assign_28)[name = string("cast_35")]; write_state(data = cast_392, input = value_cache)[name = string("coreml_update_state_127_write_state")]; tensor coreml_update_state_127 = read_state(input = value_cache)[name = string("coreml_update_state_127")]; string value_cache_internal_tensor_assign_28_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_28_dtype_0"), val = string("fp32")]; tensor var_5206_begin_0 = const()[name = string("op_5206_begin_0"), val = tensor([27, 0, 0, 0, 0])]; tensor var_5206_end_0 = const()[name = string("op_5206_end_0"), val = tensor([28, 1, 2, 2048, 128])]; tensor var_5206_end_mask_0 = const()[name = string("op_5206_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5206_squeeze_mask_0 = const()[name = string("op_5206_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_28_1 = cast(dtype = key_cache_internal_tensor_assign_28_dtype_0, x = coreml_update_state_126)[name = string("cast_34")]; tensor var_5206 = slice_by_index(begin = var_5206_begin_0, end = var_5206_end_0, end_mask = var_5206_end_mask_0, squeeze_mask = var_5206_squeeze_mask_0, x = key_cache_internal_tensor_assign_28_1)[name = string("op_5206")]; tensor var_5209_begin_0 = const()[name = string("op_5209_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5209_end_mask_0 = const()[name = string("op_5209_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5209 = slice_by_index(begin = var_5209_begin_0, end = concat_11, end_mask = var_5209_end_mask_0, x = var_5206)[name = string("op_5209")]; tensor var_5211_begin_0 = const()[name = string("op_5211_begin_0"), val = tensor([27, 0, 0, 0, 0])]; tensor var_5211_end_0 = const()[name = string("op_5211_end_0"), val = tensor([28, 1, 2, 2048, 128])]; tensor var_5211_end_mask_0 = const()[name = string("op_5211_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5211_squeeze_mask_0 = const()[name = string("op_5211_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_28_1 = cast(dtype = value_cache_internal_tensor_assign_28_dtype_0, x = coreml_update_state_127)[name = string("cast_33")]; tensor var_5211 = slice_by_index(begin = var_5211_begin_0, end = var_5211_end_0, end_mask = var_5211_end_mask_0, squeeze_mask = var_5211_squeeze_mask_0, x = value_cache_internal_tensor_assign_28_1)[name = string("op_5211")]; tensor var_5214_begin_0 = const()[name = string("op_5214_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5214_end_mask_0 = const()[name = string("op_5214_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5214 = slice_by_index(begin = var_5214_begin_0, end = concat_11, end_mask = var_5214_end_mask_0, x = var_5211)[name = string("op_5214")]; tensor var_5216_shape = shape(x = var_5209)[name = string("op_5216_shape")]; int32 gather_499 = const()[name = string("gather_499"), val = int32(1)]; int32 gather_500 = const()[name = string("gather_500"), val = int32(2)]; int32 select_501 = const()[name = string("select_501"), val = int32(2)]; int32 gather_501_axis_0 = const()[name = string("gather_501_axis_0"), val = int32(0)]; int32 gather_501_batch_dims_0 = const()[name = string("gather_501_batch_dims_0"), val = int32(0)]; bool gather_501_validate_indices_0 = const()[name = string("gather_501_validate_indices_0"), val = bool(false)]; int32 gather_501 = gather(axis = gather_501_axis_0, batch_dims = gather_501_batch_dims_0, indices = select_501, validate_indices = gather_501_validate_indices_0, x = var_5216_shape)[name = string("gather_501")]; int32 gather_502 = const()[name = string("gather_502"), val = int32(128)]; tensor var_5223_axes_0 = const()[name = string("op_5223_axes_0"), val = tensor([2])]; tensor var_5223 = expand_dims(axes = var_5223_axes_0, x = var_5209)[name = string("op_5223")]; tensor shape_557 = shape(x = var_5223)[name = string("shape_557")]; int32 concat_526_axis_0 = const()[name = string("concat_526_axis_0"), val = int32(0)]; bool concat_526_interleave_0 = const()[name = string("concat_526_interleave_0"), val = bool(false)]; tensor concat_526 = concat(axis = concat_526_axis_0, interleave = concat_526_interleave_0, values = (gather_499, gather_500, var_100, gather_501, gather_502))[name = string("concat_526")]; tensor real_div_54 = real_div(x = concat_526, y = shape_557)[name = string("real_div_54")]; tensor hidden_states_821 = tile(reps = real_div_54, x = var_5223)[name = string("hidden_states_821")]; tensor concat_527x = const()[name = string("concat_527x"), val = tensor([1, 16, -1, 128])]; tensor key_states_111 = reshape(shape = concat_527x, x = hidden_states_821)[name = string("key_states_111")]; tensor var_5233_shape = shape(x = var_5214)[name = string("op_5233_shape")]; int32 gather_503 = const()[name = string("gather_503"), val = int32(1)]; int32 gather_504 = const()[name = string("gather_504"), val = int32(2)]; int32 select_505 = const()[name = string("select_505"), val = int32(2)]; int32 gather_505_axis_0 = const()[name = string("gather_505_axis_0"), val = int32(0)]; int32 gather_505_batch_dims_0 = const()[name = string("gather_505_batch_dims_0"), val = int32(0)]; bool gather_505_validate_indices_0 = const()[name = string("gather_505_validate_indices_0"), val = bool(false)]; int32 gather_505 = gather(axis = gather_505_axis_0, batch_dims = gather_505_batch_dims_0, indices = select_505, validate_indices = gather_505_validate_indices_0, x = var_5233_shape)[name = string("gather_505")]; int32 gather_506 = const()[name = string("gather_506"), val = int32(128)]; tensor var_5240_axes_0 = const()[name = string("op_5240_axes_0"), val = tensor([2])]; tensor var_5240 = expand_dims(axes = var_5240_axes_0, x = var_5214)[name = string("op_5240")]; tensor shape_562 = shape(x = var_5240)[name = string("shape_562")]; int32 concat_528_axis_0 = const()[name = string("concat_528_axis_0"), val = int32(0)]; bool concat_528_interleave_0 = const()[name = string("concat_528_interleave_0"), val = bool(false)]; tensor concat_528 = concat(axis = concat_528_axis_0, interleave = concat_528_interleave_0, values = (gather_503, gather_504, var_100, gather_505, gather_506))[name = string("concat_528")]; tensor real_div_55 = real_div(x = concat_528, y = shape_562)[name = string("real_div_55")]; tensor hidden_states_825 = tile(reps = real_div_55, x = var_5240)[name = string("hidden_states_825")]; tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, 16, -1, 128])]; tensor value_states_111 = reshape(shape = concat_529x, x = hidden_states_825)[name = string("value_states_111")]; tensor var_5250_shape = shape(x = key_states_111)[name = string("op_5250_shape")]; int32 select_507 = const()[name = string("select_507"), val = int32(2)]; int32 gather_507_axis_0 = const()[name = string("gather_507_axis_0"), val = int32(0)]; int32 gather_507_batch_dims_0 = const()[name = string("gather_507_batch_dims_0"), val = int32(0)]; bool gather_507_validate_indices_0 = const()[name = string("gather_507_validate_indices_0"), val = bool(false)]; int32 gather_507 = gather(axis = gather_507_axis_0, batch_dims = gather_507_batch_dims_0, indices = select_507, validate_indices = gather_507_validate_indices_0, x = var_5250_shape)[name = string("gather_507")]; int32 concat_530_values0_0 = const()[name = string("concat_530_values0_0"), val = int32(1)]; int32 concat_530_values1_0 = const()[name = string("concat_530_values1_0"), val = int32(1)]; int32 concat_530_values2_0 = const()[name = string("concat_530_values2_0"), val = int32(0)]; int32 concat_530_axis_0 = const()[name = string("concat_530_axis_0"), val = int32(0)]; bool concat_530_interleave_0 = const()[name = string("concat_530_interleave_0"), val = bool(false)]; tensor concat_530 = concat(axis = concat_530_axis_0, interleave = concat_530_interleave_0, values = (concat_530_values0_0, concat_530_values1_0, concat_530_values2_0, gather_507))[name = string("concat_530")]; tensor causal_mask_57_begin_0 = const()[name = string("causal_mask_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_57_end_mask_0 = const()[name = string("causal_mask_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_57 = slice_by_index(begin = causal_mask_57_begin_0, end = concat_530, end_mask = causal_mask_57_end_mask_0, x = cast_0)[name = string("causal_mask_57")]; tensor attn_output_109 = scaled_dot_product_attention(attn_mask = causal_mask_57, key = key_states_111, query = query_states_111, value = value_states_111)[name = string("attn_output_109")]; tensor var_5256_perm_0 = const()[name = string("op_5256_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_531x = const()[name = string("concat_531x"), val = tensor([1, -1, 2048])]; tensor var_5256 = transpose(perm = var_5256_perm_0, x = attn_output_109)[name = string("transpose_32")]; tensor input_217 = reshape(shape = concat_531x, x = var_5256)[name = string("input_217")]; tensor linear_192 = linear(bias = linear_3_bias_0, weight = model_model_layers_27_self_attn_o_proj_weight_quantized, x = input_217)[name = string("linear_192")]; tensor hidden_states_829 = add(x = hidden_states_809, y = linear_192)[name = string("hidden_states_829")]; fp32 var_94_promoted_55 = const()[name = string("op_94_promoted_55"), val = fp32(0x1p+1)]; tensor var_5265 = pow(x = hidden_states_829, y = var_94_promoted_55)[name = string("op_5265")]; tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([-1])]; bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; tensor variance_111 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = var_5265)[name = string("variance_111")]; fp32 var_5268 = const()[name = string("op_5268"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5269 = add(x = variance_111, y = var_5268)[name = string("op_5269")]; fp32 var_5270_epsilon_0 = const()[name = string("op_5270_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5270 = rsqrt(epsilon = var_5270_epsilon_0, x = var_5269)[name = string("op_5270")]; tensor hidden_states_833 = mul(x = hidden_states_829, y = var_5270)[name = string("hidden_states_833")]; tensor input_219 = mul(x = model_model_layers_27_post_attention_layernorm_weight, y = hidden_states_833)[name = string("input_219")]; tensor linear_193 = linear(bias = linear_4_bias_0, weight = model_model_layers_27_mlp_gate_proj_weight_quantized, x = input_219)[name = string("linear_193")]; tensor var_5282 = silu(x = linear_193)[name = string("op_5282")]; tensor linear_194 = linear(bias = linear_4_bias_0, weight = model_model_layers_27_mlp_up_proj_weight_quantized, x = input_219)[name = string("linear_194")]; tensor input_223 = mul(x = var_5282, y = linear_194)[name = string("input_223")]; tensor linear_195 = linear(bias = linear_3_bias_0, weight = model_model_layers_27_mlp_down_proj_weight_quantized, x = input_223)[name = string("linear_195")]; tensor hidden_states_839 = add(x = hidden_states_829, y = linear_195)[name = string("hidden_states_839")]; fp32 var_94_promoted_56 = const()[name = string("op_94_promoted_56"), val = fp32(0x1p+1)]; tensor var_5295 = pow(x = hidden_states_839, y = var_94_promoted_56)[name = string("op_5295")]; tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([-1])]; bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; tensor variance_113 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = var_5295)[name = string("variance_113")]; fp32 var_5298 = const()[name = string("op_5298"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5299 = add(x = variance_113, y = var_5298)[name = string("op_5299")]; fp32 var_5300_epsilon_0 = const()[name = string("op_5300_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5300 = rsqrt(epsilon = var_5300_epsilon_0, x = var_5299)[name = string("op_5300")]; tensor hidden_states_843 = mul(x = hidden_states_839, y = var_5300)[name = string("hidden_states_843")]; tensor hidden_states_847 = mul(x = model_model_layers_28_input_layernorm_weight, y = hidden_states_843)[name = string("hidden_states_847")]; tensor linear_196 = linear(bias = model_model_layers_28_self_attn_q_proj_bias, weight = model_model_layers_28_self_attn_q_proj_weight_quantized, x = hidden_states_847)[name = string("linear_196")]; tensor linear_197 = linear(bias = model_model_layers_28_self_attn_k_proj_bias, weight = model_model_layers_28_self_attn_k_proj_weight_quantized, x = hidden_states_847)[name = string("linear_197")]; tensor linear_198 = linear(bias = model_model_layers_28_self_attn_v_proj_bias, weight = model_model_layers_28_self_attn_v_proj_weight_quantized, x = hidden_states_847)[name = string("linear_198")]; tensor concat_532x = const()[name = string("concat_532x"), val = tensor([1, -1, 16, 128])]; tensor var_5323 = reshape(shape = concat_532x, x = linear_196)[name = string("op_5323")]; tensor q_57_perm_0 = const()[name = string("q_57_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_533x = const()[name = string("concat_533x"), val = tensor([1, -1, 2, 128])]; tensor var_5326 = reshape(shape = concat_533x, x = linear_197)[name = string("op_5326")]; tensor k_57_perm_0 = const()[name = string("k_57_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_534x = const()[name = string("concat_534x"), val = tensor([1, -1, 2, 128])]; tensor var_5329 = reshape(shape = concat_534x, x = linear_198)[name = string("op_5329")]; tensor v_state_57_perm_0 = const()[name = string("v_state_57_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_57 = transpose(perm = q_57_perm_0, x = var_5323)[name = string("transpose_31")]; tensor var_5333 = mul(x = q_57, y = cos_7)[name = string("op_5333")]; tensor x1_113_begin_0 = const()[name = string("x1_113_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_113_end_0 = const()[name = string("x1_113_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_113_end_mask_0 = const()[name = string("x1_113_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_113 = slice_by_index(begin = x1_113_begin_0, end = x1_113_end_0, end_mask = x1_113_end_mask_0, x = q_57)[name = string("x1_113")]; tensor x2_113_begin_0 = const()[name = string("x2_113_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_113_end_0 = const()[name = string("x2_113_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_113_end_mask_0 = const()[name = string("x2_113_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_113 = slice_by_index(begin = x2_113_begin_0, end = x2_113_end_0, end_mask = x2_113_end_mask_0, x = q_57)[name = string("x2_113")]; fp32 const_59_promoted = const()[name = string("const_59_promoted"), val = fp32(-0x1p+0)]; tensor var_5344 = mul(x = x2_113, y = const_59_promoted)[name = string("op_5344")]; bool var_5346_interleave_0 = const()[name = string("op_5346_interleave_0"), val = bool(false)]; tensor var_5346 = concat(axis = var_88, interleave = var_5346_interleave_0, values = (var_5344, x1_113))[name = string("op_5346")]; tensor var_5347 = mul(x = var_5346, y = sin_7)[name = string("op_5347")]; tensor query_states_115 = add(x = var_5333, y = var_5347)[name = string("query_states_115")]; tensor k_57 = transpose(perm = k_57_perm_0, x = var_5326)[name = string("transpose_30")]; tensor var_5349 = mul(x = k_57, y = cos_7)[name = string("op_5349")]; tensor x1_115_begin_0 = const()[name = string("x1_115_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_115_end_0 = const()[name = string("x1_115_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_115_end_mask_0 = const()[name = string("x1_115_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_115 = slice_by_index(begin = x1_115_begin_0, end = x1_115_end_0, end_mask = x1_115_end_mask_0, x = k_57)[name = string("x1_115")]; tensor x2_115_begin_0 = const()[name = string("x2_115_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_115_end_0 = const()[name = string("x2_115_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_115_end_mask_0 = const()[name = string("x2_115_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_115 = slice_by_index(begin = x2_115_begin_0, end = x2_115_end_0, end_mask = x2_115_end_mask_0, x = k_57)[name = string("x2_115")]; fp32 const_60_promoted = const()[name = string("const_60_promoted"), val = fp32(-0x1p+0)]; tensor var_5360 = mul(x = x2_115, y = const_60_promoted)[name = string("op_5360")]; bool var_5362_interleave_0 = const()[name = string("op_5362_interleave_0"), val = bool(false)]; tensor var_5362 = concat(axis = var_88, interleave = var_5362_interleave_0, values = (var_5360, x1_115))[name = string("op_5362")]; tensor var_5363 = mul(x = var_5362, y = sin_7)[name = string("op_5363")]; tensor k_state_57 = add(x = var_5349, y = var_5363)[name = string("k_state_57")]; tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; tensor expand_dims_337 = const()[name = string("expand_dims_337"), val = tensor([0])]; tensor expand_dims_339 = const()[name = string("expand_dims_339"), val = tensor([0])]; tensor concat_537_values0_0 = const()[name = string("concat_537_values0_0"), val = tensor([28])]; int32 concat_537_axis_0 = const()[name = string("concat_537_axis_0"), val = int32(0)]; bool concat_537_interleave_0 = const()[name = string("concat_537_interleave_0"), val = bool(false)]; tensor concat_537 = concat(axis = concat_537_axis_0, interleave = concat_537_interleave_0, values = (concat_537_values0_0, expand_dims_336, expand_dims_337, expand_dims_2, expand_dims_339))[name = string("concat_537")]; tensor key_cache_internal_tensor_assign_29_stride_0 = const()[name = string("key_cache_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_29_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_29_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_29 = slice_update(begin = concat_537, begin_mask = key_cache_internal_tensor_assign_29_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_29_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_29_squeeze_mask_0, stride = key_cache_internal_tensor_assign_29_stride_0, update = k_state_57, x = key_cache_internal_tensor_assign_28_1)[name = string("key_cache_internal_tensor_assign_29")]; string cast_405_dtype_0 = const()[name = string("cast_405_dtype_0"), val = string("fp16")]; tensor cast_405 = cast(dtype = cast_405_dtype_0, x = key_cache_internal_tensor_assign_29)[name = string("cast_32")]; write_state(data = cast_405, input = key_cache)[name = string("coreml_update_state_128_write_state")]; tensor coreml_update_state_128 = read_state(input = key_cache)[name = string("coreml_update_state_128")]; string key_cache_internal_tensor_assign_29_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_29_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_29_stride_0 = const()[name = string("value_cache_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_29_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_29_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_57 = transpose(perm = v_state_57_perm_0, x = var_5329)[name = string("transpose_29")]; tensor value_cache_internal_tensor_assign_29 = slice_update(begin = concat_537, begin_mask = value_cache_internal_tensor_assign_29_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_29_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_29_squeeze_mask_0, stride = value_cache_internal_tensor_assign_29_stride_0, update = v_state_57, x = value_cache_internal_tensor_assign_28_1)[name = string("value_cache_internal_tensor_assign_29")]; string cast_406_dtype_0 = const()[name = string("cast_406_dtype_0"), val = string("fp16")]; tensor cast_406 = cast(dtype = cast_406_dtype_0, x = value_cache_internal_tensor_assign_29)[name = string("cast_31")]; write_state(data = cast_406, input = value_cache)[name = string("coreml_update_state_129_write_state")]; tensor coreml_update_state_129 = read_state(input = value_cache)[name = string("coreml_update_state_129")]; string value_cache_internal_tensor_assign_29_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_29_dtype_0"), val = string("fp32")]; tensor var_5386_begin_0 = const()[name = string("op_5386_begin_0"), val = tensor([28, 0, 0, 0, 0])]; tensor var_5386_end_0 = const()[name = string("op_5386_end_0"), val = tensor([29, 1, 2, 2048, 128])]; tensor var_5386_end_mask_0 = const()[name = string("op_5386_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5386_squeeze_mask_0 = const()[name = string("op_5386_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_29_1 = cast(dtype = key_cache_internal_tensor_assign_29_dtype_0, x = coreml_update_state_128)[name = string("cast_30")]; tensor var_5386 = slice_by_index(begin = var_5386_begin_0, end = var_5386_end_0, end_mask = var_5386_end_mask_0, squeeze_mask = var_5386_squeeze_mask_0, x = key_cache_internal_tensor_assign_29_1)[name = string("op_5386")]; tensor var_5389_begin_0 = const()[name = string("op_5389_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5389_end_mask_0 = const()[name = string("op_5389_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5389 = slice_by_index(begin = var_5389_begin_0, end = concat_11, end_mask = var_5389_end_mask_0, x = var_5386)[name = string("op_5389")]; tensor var_5391_begin_0 = const()[name = string("op_5391_begin_0"), val = tensor([28, 0, 0, 0, 0])]; tensor var_5391_end_0 = const()[name = string("op_5391_end_0"), val = tensor([29, 1, 2, 2048, 128])]; tensor var_5391_end_mask_0 = const()[name = string("op_5391_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5391_squeeze_mask_0 = const()[name = string("op_5391_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_29_1 = cast(dtype = value_cache_internal_tensor_assign_29_dtype_0, x = coreml_update_state_129)[name = string("cast_29")]; tensor var_5391 = slice_by_index(begin = var_5391_begin_0, end = var_5391_end_0, end_mask = var_5391_end_mask_0, squeeze_mask = var_5391_squeeze_mask_0, x = value_cache_internal_tensor_assign_29_1)[name = string("op_5391")]; tensor var_5394_begin_0 = const()[name = string("op_5394_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5394_end_mask_0 = const()[name = string("op_5394_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5394 = slice_by_index(begin = var_5394_begin_0, end = concat_11, end_mask = var_5394_end_mask_0, x = var_5391)[name = string("op_5394")]; tensor var_5396_shape = shape(x = var_5389)[name = string("op_5396_shape")]; int32 gather_517 = const()[name = string("gather_517"), val = int32(1)]; int32 gather_518 = const()[name = string("gather_518"), val = int32(2)]; int32 select_519 = const()[name = string("select_519"), val = int32(2)]; int32 gather_519_axis_0 = const()[name = string("gather_519_axis_0"), val = int32(0)]; int32 gather_519_batch_dims_0 = const()[name = string("gather_519_batch_dims_0"), val = int32(0)]; bool gather_519_validate_indices_0 = const()[name = string("gather_519_validate_indices_0"), val = bool(false)]; int32 gather_519 = gather(axis = gather_519_axis_0, batch_dims = gather_519_batch_dims_0, indices = select_519, validate_indices = gather_519_validate_indices_0, x = var_5396_shape)[name = string("gather_519")]; int32 gather_520 = const()[name = string("gather_520"), val = int32(128)]; tensor var_5403_axes_0 = const()[name = string("op_5403_axes_0"), val = tensor([2])]; tensor var_5403 = expand_dims(axes = var_5403_axes_0, x = var_5389)[name = string("op_5403")]; tensor shape_577 = shape(x = var_5403)[name = string("shape_577")]; int32 concat_545_axis_0 = const()[name = string("concat_545_axis_0"), val = int32(0)]; bool concat_545_interleave_0 = const()[name = string("concat_545_interleave_0"), val = bool(false)]; tensor concat_545 = concat(axis = concat_545_axis_0, interleave = concat_545_interleave_0, values = (gather_517, gather_518, var_100, gather_519, gather_520))[name = string("concat_545")]; tensor real_div_56 = real_div(x = concat_545, y = shape_577)[name = string("real_div_56")]; tensor hidden_states_851 = tile(reps = real_div_56, x = var_5403)[name = string("hidden_states_851")]; tensor concat_546x = const()[name = string("concat_546x"), val = tensor([1, 16, -1, 128])]; tensor key_states_115 = reshape(shape = concat_546x, x = hidden_states_851)[name = string("key_states_115")]; tensor var_5413_shape = shape(x = var_5394)[name = string("op_5413_shape")]; int32 gather_521 = const()[name = string("gather_521"), val = int32(1)]; int32 gather_522 = const()[name = string("gather_522"), val = int32(2)]; int32 select_523 = const()[name = string("select_523"), val = int32(2)]; int32 gather_523_axis_0 = const()[name = string("gather_523_axis_0"), val = int32(0)]; int32 gather_523_batch_dims_0 = const()[name = string("gather_523_batch_dims_0"), val = int32(0)]; bool gather_523_validate_indices_0 = const()[name = string("gather_523_validate_indices_0"), val = bool(false)]; int32 gather_523 = gather(axis = gather_523_axis_0, batch_dims = gather_523_batch_dims_0, indices = select_523, validate_indices = gather_523_validate_indices_0, x = var_5413_shape)[name = string("gather_523")]; int32 gather_524 = const()[name = string("gather_524"), val = int32(128)]; tensor var_5420_axes_0 = const()[name = string("op_5420_axes_0"), val = tensor([2])]; tensor var_5420 = expand_dims(axes = var_5420_axes_0, x = var_5394)[name = string("op_5420")]; tensor shape_582 = shape(x = var_5420)[name = string("shape_582")]; int32 concat_547_axis_0 = const()[name = string("concat_547_axis_0"), val = int32(0)]; bool concat_547_interleave_0 = const()[name = string("concat_547_interleave_0"), val = bool(false)]; tensor concat_547 = concat(axis = concat_547_axis_0, interleave = concat_547_interleave_0, values = (gather_521, gather_522, var_100, gather_523, gather_524))[name = string("concat_547")]; tensor real_div_57 = real_div(x = concat_547, y = shape_582)[name = string("real_div_57")]; tensor hidden_states_855 = tile(reps = real_div_57, x = var_5420)[name = string("hidden_states_855")]; tensor concat_548x = const()[name = string("concat_548x"), val = tensor([1, 16, -1, 128])]; tensor value_states_115 = reshape(shape = concat_548x, x = hidden_states_855)[name = string("value_states_115")]; tensor var_5430_shape = shape(x = key_states_115)[name = string("op_5430_shape")]; int32 select_525 = const()[name = string("select_525"), val = int32(2)]; int32 gather_525_axis_0 = const()[name = string("gather_525_axis_0"), val = int32(0)]; int32 gather_525_batch_dims_0 = const()[name = string("gather_525_batch_dims_0"), val = int32(0)]; bool gather_525_validate_indices_0 = const()[name = string("gather_525_validate_indices_0"), val = bool(false)]; int32 gather_525 = gather(axis = gather_525_axis_0, batch_dims = gather_525_batch_dims_0, indices = select_525, validate_indices = gather_525_validate_indices_0, x = var_5430_shape)[name = string("gather_525")]; int32 concat_549_values0_0 = const()[name = string("concat_549_values0_0"), val = int32(1)]; int32 concat_549_values1_0 = const()[name = string("concat_549_values1_0"), val = int32(1)]; int32 concat_549_values2_0 = const()[name = string("concat_549_values2_0"), val = int32(0)]; int32 concat_549_axis_0 = const()[name = string("concat_549_axis_0"), val = int32(0)]; bool concat_549_interleave_0 = const()[name = string("concat_549_interleave_0"), val = bool(false)]; tensor concat_549 = concat(axis = concat_549_axis_0, interleave = concat_549_interleave_0, values = (concat_549_values0_0, concat_549_values1_0, concat_549_values2_0, gather_525))[name = string("concat_549")]; tensor causal_mask_59_begin_0 = const()[name = string("causal_mask_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_59_end_mask_0 = const()[name = string("causal_mask_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_59 = slice_by_index(begin = causal_mask_59_begin_0, end = concat_549, end_mask = causal_mask_59_end_mask_0, x = cast_0)[name = string("causal_mask_59")]; tensor attn_output_113 = scaled_dot_product_attention(attn_mask = causal_mask_59, key = key_states_115, query = query_states_115, value = value_states_115)[name = string("attn_output_113")]; tensor var_5436_perm_0 = const()[name = string("op_5436_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_550x = const()[name = string("concat_550x"), val = tensor([1, -1, 2048])]; tensor var_5436 = transpose(perm = var_5436_perm_0, x = attn_output_113)[name = string("transpose_28")]; tensor input_225 = reshape(shape = concat_550x, x = var_5436)[name = string("input_225")]; tensor linear_199 = linear(bias = linear_3_bias_0, weight = model_model_layers_28_self_attn_o_proj_weight_quantized, x = input_225)[name = string("linear_199")]; tensor hidden_states_859 = add(x = hidden_states_839, y = linear_199)[name = string("hidden_states_859")]; fp32 var_94_promoted_57 = const()[name = string("op_94_promoted_57"), val = fp32(0x1p+1)]; tensor var_5445 = pow(x = hidden_states_859, y = var_94_promoted_57)[name = string("op_5445")]; tensor variance_115_axes_0 = const()[name = string("variance_115_axes_0"), val = tensor([-1])]; bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; tensor variance_115 = reduce_mean(axes = variance_115_axes_0, keep_dims = variance_115_keep_dims_0, x = var_5445)[name = string("variance_115")]; fp32 var_5448 = const()[name = string("op_5448"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5449 = add(x = variance_115, y = var_5448)[name = string("op_5449")]; fp32 var_5450_epsilon_0 = const()[name = string("op_5450_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5450 = rsqrt(epsilon = var_5450_epsilon_0, x = var_5449)[name = string("op_5450")]; tensor hidden_states_863 = mul(x = hidden_states_859, y = var_5450)[name = string("hidden_states_863")]; tensor input_227 = mul(x = model_model_layers_28_post_attention_layernorm_weight, y = hidden_states_863)[name = string("input_227")]; tensor linear_200 = linear(bias = linear_4_bias_0, weight = model_model_layers_28_mlp_gate_proj_weight_quantized, x = input_227)[name = string("linear_200")]; tensor var_5462 = silu(x = linear_200)[name = string("op_5462")]; tensor linear_201 = linear(bias = linear_4_bias_0, weight = model_model_layers_28_mlp_up_proj_weight_quantized, x = input_227)[name = string("linear_201")]; tensor input_231 = mul(x = var_5462, y = linear_201)[name = string("input_231")]; tensor linear_202 = linear(bias = linear_3_bias_0, weight = model_model_layers_28_mlp_down_proj_weight_quantized, x = input_231)[name = string("linear_202")]; tensor hidden_states_869 = add(x = hidden_states_859, y = linear_202)[name = string("hidden_states_869")]; fp32 var_94_promoted_58 = const()[name = string("op_94_promoted_58"), val = fp32(0x1p+1)]; tensor var_5475 = pow(x = hidden_states_869, y = var_94_promoted_58)[name = string("op_5475")]; tensor variance_117_axes_0 = const()[name = string("variance_117_axes_0"), val = tensor([-1])]; bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; tensor variance_117 = reduce_mean(axes = variance_117_axes_0, keep_dims = variance_117_keep_dims_0, x = var_5475)[name = string("variance_117")]; fp32 var_5478 = const()[name = string("op_5478"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5479 = add(x = variance_117, y = var_5478)[name = string("op_5479")]; fp32 var_5480_epsilon_0 = const()[name = string("op_5480_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5480 = rsqrt(epsilon = var_5480_epsilon_0, x = var_5479)[name = string("op_5480")]; tensor hidden_states_873 = mul(x = hidden_states_869, y = var_5480)[name = string("hidden_states_873")]; tensor hidden_states_877 = mul(x = model_model_layers_29_input_layernorm_weight, y = hidden_states_873)[name = string("hidden_states_877")]; tensor linear_203 = linear(bias = model_model_layers_29_self_attn_q_proj_bias, weight = model_model_layers_29_self_attn_q_proj_weight_quantized, x = hidden_states_877)[name = string("linear_203")]; tensor linear_204 = linear(bias = model_model_layers_29_self_attn_k_proj_bias, weight = model_model_layers_29_self_attn_k_proj_weight_quantized, x = hidden_states_877)[name = string("linear_204")]; tensor linear_205 = linear(bias = model_model_layers_29_self_attn_v_proj_bias, weight = model_model_layers_29_self_attn_v_proj_weight_quantized, x = hidden_states_877)[name = string("linear_205")]; tensor concat_551x = const()[name = string("concat_551x"), val = tensor([1, -1, 16, 128])]; tensor var_5503 = reshape(shape = concat_551x, x = linear_203)[name = string("op_5503")]; tensor q_59_perm_0 = const()[name = string("q_59_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_552x = const()[name = string("concat_552x"), val = tensor([1, -1, 2, 128])]; tensor var_5506 = reshape(shape = concat_552x, x = linear_204)[name = string("op_5506")]; tensor k_59_perm_0 = const()[name = string("k_59_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_553x = const()[name = string("concat_553x"), val = tensor([1, -1, 2, 128])]; tensor var_5509 = reshape(shape = concat_553x, x = linear_205)[name = string("op_5509")]; tensor v_state_59_perm_0 = const()[name = string("v_state_59_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_59 = transpose(perm = q_59_perm_0, x = var_5503)[name = string("transpose_27")]; tensor var_5513 = mul(x = q_59, y = cos_7)[name = string("op_5513")]; tensor x1_117_begin_0 = const()[name = string("x1_117_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_117_end_0 = const()[name = string("x1_117_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_117_end_mask_0 = const()[name = string("x1_117_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_117 = slice_by_index(begin = x1_117_begin_0, end = x1_117_end_0, end_mask = x1_117_end_mask_0, x = q_59)[name = string("x1_117")]; tensor x2_117_begin_0 = const()[name = string("x2_117_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_117_end_0 = const()[name = string("x2_117_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_117_end_mask_0 = const()[name = string("x2_117_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_117 = slice_by_index(begin = x2_117_begin_0, end = x2_117_end_0, end_mask = x2_117_end_mask_0, x = q_59)[name = string("x2_117")]; fp32 const_61_promoted = const()[name = string("const_61_promoted"), val = fp32(-0x1p+0)]; tensor var_5524 = mul(x = x2_117, y = const_61_promoted)[name = string("op_5524")]; bool var_5526_interleave_0 = const()[name = string("op_5526_interleave_0"), val = bool(false)]; tensor var_5526 = concat(axis = var_88, interleave = var_5526_interleave_0, values = (var_5524, x1_117))[name = string("op_5526")]; tensor var_5527 = mul(x = var_5526, y = sin_7)[name = string("op_5527")]; tensor query_states_119 = add(x = var_5513, y = var_5527)[name = string("query_states_119")]; tensor k_59 = transpose(perm = k_59_perm_0, x = var_5506)[name = string("transpose_26")]; tensor var_5529 = mul(x = k_59, y = cos_7)[name = string("op_5529")]; tensor x1_119_begin_0 = const()[name = string("x1_119_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_119_end_0 = const()[name = string("x1_119_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_119_end_mask_0 = const()[name = string("x1_119_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_119 = slice_by_index(begin = x1_119_begin_0, end = x1_119_end_0, end_mask = x1_119_end_mask_0, x = k_59)[name = string("x1_119")]; tensor x2_119_begin_0 = const()[name = string("x2_119_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_119_end_0 = const()[name = string("x2_119_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_119_end_mask_0 = const()[name = string("x2_119_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_119 = slice_by_index(begin = x2_119_begin_0, end = x2_119_end_0, end_mask = x2_119_end_mask_0, x = k_59)[name = string("x2_119")]; fp32 const_62_promoted = const()[name = string("const_62_promoted"), val = fp32(-0x1p+0)]; tensor var_5540 = mul(x = x2_119, y = const_62_promoted)[name = string("op_5540")]; bool var_5542_interleave_0 = const()[name = string("op_5542_interleave_0"), val = bool(false)]; tensor var_5542 = concat(axis = var_88, interleave = var_5542_interleave_0, values = (var_5540, x1_119))[name = string("op_5542")]; tensor var_5543 = mul(x = var_5542, y = sin_7)[name = string("op_5543")]; tensor k_state_59 = add(x = var_5529, y = var_5543)[name = string("k_state_59")]; tensor expand_dims_348 = const()[name = string("expand_dims_348"), val = tensor([0])]; tensor expand_dims_349 = const()[name = string("expand_dims_349"), val = tensor([0])]; tensor expand_dims_351 = const()[name = string("expand_dims_351"), val = tensor([0])]; tensor concat_556_values0_0 = const()[name = string("concat_556_values0_0"), val = tensor([29])]; int32 concat_556_axis_0 = const()[name = string("concat_556_axis_0"), val = int32(0)]; bool concat_556_interleave_0 = const()[name = string("concat_556_interleave_0"), val = bool(false)]; tensor concat_556 = concat(axis = concat_556_axis_0, interleave = concat_556_interleave_0, values = (concat_556_values0_0, expand_dims_348, expand_dims_349, expand_dims_2, expand_dims_351))[name = string("concat_556")]; tensor key_cache_internal_tensor_assign_30_stride_0 = const()[name = string("key_cache_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_30_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_30_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_30 = slice_update(begin = concat_556, begin_mask = key_cache_internal_tensor_assign_30_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_30_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_30_squeeze_mask_0, stride = key_cache_internal_tensor_assign_30_stride_0, update = k_state_59, x = key_cache_internal_tensor_assign_29_1)[name = string("key_cache_internal_tensor_assign_30")]; string cast_419_dtype_0 = const()[name = string("cast_419_dtype_0"), val = string("fp16")]; tensor cast_419 = cast(dtype = cast_419_dtype_0, x = key_cache_internal_tensor_assign_30)[name = string("cast_28")]; write_state(data = cast_419, input = key_cache)[name = string("coreml_update_state_130_write_state")]; tensor coreml_update_state_130 = read_state(input = key_cache)[name = string("coreml_update_state_130")]; string key_cache_internal_tensor_assign_30_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_30_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_30_stride_0 = const()[name = string("value_cache_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_30_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_30_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_59 = transpose(perm = v_state_59_perm_0, x = var_5509)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_30 = slice_update(begin = concat_556, begin_mask = value_cache_internal_tensor_assign_30_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_30_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_30_squeeze_mask_0, stride = value_cache_internal_tensor_assign_30_stride_0, update = v_state_59, x = value_cache_internal_tensor_assign_29_1)[name = string("value_cache_internal_tensor_assign_30")]; string cast_420_dtype_0 = const()[name = string("cast_420_dtype_0"), val = string("fp16")]; tensor cast_420 = cast(dtype = cast_420_dtype_0, x = value_cache_internal_tensor_assign_30)[name = string("cast_27")]; write_state(data = cast_420, input = value_cache)[name = string("coreml_update_state_131_write_state")]; tensor coreml_update_state_131 = read_state(input = value_cache)[name = string("coreml_update_state_131")]; string value_cache_internal_tensor_assign_30_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_30_dtype_0"), val = string("fp32")]; tensor var_5566_begin_0 = const()[name = string("op_5566_begin_0"), val = tensor([29, 0, 0, 0, 0])]; tensor var_5566_end_0 = const()[name = string("op_5566_end_0"), val = tensor([30, 1, 2, 2048, 128])]; tensor var_5566_end_mask_0 = const()[name = string("op_5566_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5566_squeeze_mask_0 = const()[name = string("op_5566_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_30_1 = cast(dtype = key_cache_internal_tensor_assign_30_dtype_0, x = coreml_update_state_130)[name = string("cast_26")]; tensor var_5566 = slice_by_index(begin = var_5566_begin_0, end = var_5566_end_0, end_mask = var_5566_end_mask_0, squeeze_mask = var_5566_squeeze_mask_0, x = key_cache_internal_tensor_assign_30_1)[name = string("op_5566")]; tensor var_5569_begin_0 = const()[name = string("op_5569_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5569_end_mask_0 = const()[name = string("op_5569_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5569 = slice_by_index(begin = var_5569_begin_0, end = concat_11, end_mask = var_5569_end_mask_0, x = var_5566)[name = string("op_5569")]; tensor var_5571_begin_0 = const()[name = string("op_5571_begin_0"), val = tensor([29, 0, 0, 0, 0])]; tensor var_5571_end_0 = const()[name = string("op_5571_end_0"), val = tensor([30, 1, 2, 2048, 128])]; tensor var_5571_end_mask_0 = const()[name = string("op_5571_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5571_squeeze_mask_0 = const()[name = string("op_5571_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_30_1 = cast(dtype = value_cache_internal_tensor_assign_30_dtype_0, x = coreml_update_state_131)[name = string("cast_25")]; tensor var_5571 = slice_by_index(begin = var_5571_begin_0, end = var_5571_end_0, end_mask = var_5571_end_mask_0, squeeze_mask = var_5571_squeeze_mask_0, x = value_cache_internal_tensor_assign_30_1)[name = string("op_5571")]; tensor var_5574_begin_0 = const()[name = string("op_5574_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5574_end_mask_0 = const()[name = string("op_5574_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5574 = slice_by_index(begin = var_5574_begin_0, end = concat_11, end_mask = var_5574_end_mask_0, x = var_5571)[name = string("op_5574")]; tensor var_5576_shape = shape(x = var_5569)[name = string("op_5576_shape")]; int32 gather_535 = const()[name = string("gather_535"), val = int32(1)]; int32 gather_536 = const()[name = string("gather_536"), val = int32(2)]; int32 select_537 = const()[name = string("select_537"), val = int32(2)]; int32 gather_537_axis_0 = const()[name = string("gather_537_axis_0"), val = int32(0)]; int32 gather_537_batch_dims_0 = const()[name = string("gather_537_batch_dims_0"), val = int32(0)]; bool gather_537_validate_indices_0 = const()[name = string("gather_537_validate_indices_0"), val = bool(false)]; int32 gather_537 = gather(axis = gather_537_axis_0, batch_dims = gather_537_batch_dims_0, indices = select_537, validate_indices = gather_537_validate_indices_0, x = var_5576_shape)[name = string("gather_537")]; int32 gather_538 = const()[name = string("gather_538"), val = int32(128)]; tensor var_5583_axes_0 = const()[name = string("op_5583_axes_0"), val = tensor([2])]; tensor var_5583 = expand_dims(axes = var_5583_axes_0, x = var_5569)[name = string("op_5583")]; tensor shape_597 = shape(x = var_5583)[name = string("shape_597")]; int32 concat_564_axis_0 = const()[name = string("concat_564_axis_0"), val = int32(0)]; bool concat_564_interleave_0 = const()[name = string("concat_564_interleave_0"), val = bool(false)]; tensor concat_564 = concat(axis = concat_564_axis_0, interleave = concat_564_interleave_0, values = (gather_535, gather_536, var_100, gather_537, gather_538))[name = string("concat_564")]; tensor real_div_58 = real_div(x = concat_564, y = shape_597)[name = string("real_div_58")]; tensor hidden_states_881 = tile(reps = real_div_58, x = var_5583)[name = string("hidden_states_881")]; tensor concat_565x = const()[name = string("concat_565x"), val = tensor([1, 16, -1, 128])]; tensor key_states_119 = reshape(shape = concat_565x, x = hidden_states_881)[name = string("key_states_119")]; tensor var_5593_shape = shape(x = var_5574)[name = string("op_5593_shape")]; int32 gather_539 = const()[name = string("gather_539"), val = int32(1)]; int32 gather_540 = const()[name = string("gather_540"), val = int32(2)]; int32 select_541 = const()[name = string("select_541"), val = int32(2)]; int32 gather_541_axis_0 = const()[name = string("gather_541_axis_0"), val = int32(0)]; int32 gather_541_batch_dims_0 = const()[name = string("gather_541_batch_dims_0"), val = int32(0)]; bool gather_541_validate_indices_0 = const()[name = string("gather_541_validate_indices_0"), val = bool(false)]; int32 gather_541 = gather(axis = gather_541_axis_0, batch_dims = gather_541_batch_dims_0, indices = select_541, validate_indices = gather_541_validate_indices_0, x = var_5593_shape)[name = string("gather_541")]; int32 gather_542 = const()[name = string("gather_542"), val = int32(128)]; tensor var_5600_axes_0 = const()[name = string("op_5600_axes_0"), val = tensor([2])]; tensor var_5600 = expand_dims(axes = var_5600_axes_0, x = var_5574)[name = string("op_5600")]; tensor shape_602 = shape(x = var_5600)[name = string("shape_602")]; int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)]; bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)]; tensor concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (gather_539, gather_540, var_100, gather_541, gather_542))[name = string("concat_566")]; tensor real_div_59 = real_div(x = concat_566, y = shape_602)[name = string("real_div_59")]; tensor hidden_states_885 = tile(reps = real_div_59, x = var_5600)[name = string("hidden_states_885")]; tensor concat_567x = const()[name = string("concat_567x"), val = tensor([1, 16, -1, 128])]; tensor value_states_119 = reshape(shape = concat_567x, x = hidden_states_885)[name = string("value_states_119")]; tensor var_5610_shape = shape(x = key_states_119)[name = string("op_5610_shape")]; int32 select_543 = const()[name = string("select_543"), val = int32(2)]; int32 gather_543_axis_0 = const()[name = string("gather_543_axis_0"), val = int32(0)]; int32 gather_543_batch_dims_0 = const()[name = string("gather_543_batch_dims_0"), val = int32(0)]; bool gather_543_validate_indices_0 = const()[name = string("gather_543_validate_indices_0"), val = bool(false)]; int32 gather_543 = gather(axis = gather_543_axis_0, batch_dims = gather_543_batch_dims_0, indices = select_543, validate_indices = gather_543_validate_indices_0, x = var_5610_shape)[name = string("gather_543")]; int32 concat_568_values0_0 = const()[name = string("concat_568_values0_0"), val = int32(1)]; int32 concat_568_values1_0 = const()[name = string("concat_568_values1_0"), val = int32(1)]; int32 concat_568_values2_0 = const()[name = string("concat_568_values2_0"), val = int32(0)]; int32 concat_568_axis_0 = const()[name = string("concat_568_axis_0"), val = int32(0)]; bool concat_568_interleave_0 = const()[name = string("concat_568_interleave_0"), val = bool(false)]; tensor concat_568 = concat(axis = concat_568_axis_0, interleave = concat_568_interleave_0, values = (concat_568_values0_0, concat_568_values1_0, concat_568_values2_0, gather_543))[name = string("concat_568")]; tensor causal_mask_61_begin_0 = const()[name = string("causal_mask_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_61_end_mask_0 = const()[name = string("causal_mask_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_61 = slice_by_index(begin = causal_mask_61_begin_0, end = concat_568, end_mask = causal_mask_61_end_mask_0, x = cast_0)[name = string("causal_mask_61")]; tensor attn_output_117 = scaled_dot_product_attention(attn_mask = causal_mask_61, key = key_states_119, query = query_states_119, value = value_states_119)[name = string("attn_output_117")]; tensor var_5616_perm_0 = const()[name = string("op_5616_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_569x = const()[name = string("concat_569x"), val = tensor([1, -1, 2048])]; tensor var_5616 = transpose(perm = var_5616_perm_0, x = attn_output_117)[name = string("transpose_24")]; tensor input_233 = reshape(shape = concat_569x, x = var_5616)[name = string("input_233")]; tensor linear_206 = linear(bias = linear_3_bias_0, weight = model_model_layers_29_self_attn_o_proj_weight_quantized, x = input_233)[name = string("linear_206")]; tensor hidden_states_889 = add(x = hidden_states_869, y = linear_206)[name = string("hidden_states_889")]; fp32 var_94_promoted_59 = const()[name = string("op_94_promoted_59"), val = fp32(0x1p+1)]; tensor var_5625 = pow(x = hidden_states_889, y = var_94_promoted_59)[name = string("op_5625")]; tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([-1])]; bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; tensor variance_119 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = var_5625)[name = string("variance_119")]; fp32 var_5628 = const()[name = string("op_5628"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5629 = add(x = variance_119, y = var_5628)[name = string("op_5629")]; fp32 var_5630_epsilon_0 = const()[name = string("op_5630_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5630 = rsqrt(epsilon = var_5630_epsilon_0, x = var_5629)[name = string("op_5630")]; tensor hidden_states_893 = mul(x = hidden_states_889, y = var_5630)[name = string("hidden_states_893")]; tensor input_235 = mul(x = model_model_layers_29_post_attention_layernorm_weight, y = hidden_states_893)[name = string("input_235")]; tensor linear_207 = linear(bias = linear_4_bias_0, weight = model_model_layers_29_mlp_gate_proj_weight_quantized, x = input_235)[name = string("linear_207")]; tensor var_5642 = silu(x = linear_207)[name = string("op_5642")]; tensor linear_208 = linear(bias = linear_4_bias_0, weight = model_model_layers_29_mlp_up_proj_weight_quantized, x = input_235)[name = string("linear_208")]; tensor input_239 = mul(x = var_5642, y = linear_208)[name = string("input_239")]; tensor linear_209 = linear(bias = linear_3_bias_0, weight = model_model_layers_29_mlp_down_proj_weight_quantized, x = input_239)[name = string("linear_209")]; tensor hidden_states_899 = add(x = hidden_states_889, y = linear_209)[name = string("hidden_states_899")]; fp32 var_94_promoted_60 = const()[name = string("op_94_promoted_60"), val = fp32(0x1p+1)]; tensor var_5655 = pow(x = hidden_states_899, y = var_94_promoted_60)[name = string("op_5655")]; tensor variance_121_axes_0 = const()[name = string("variance_121_axes_0"), val = tensor([-1])]; bool variance_121_keep_dims_0 = const()[name = string("variance_121_keep_dims_0"), val = bool(true)]; tensor variance_121 = reduce_mean(axes = variance_121_axes_0, keep_dims = variance_121_keep_dims_0, x = var_5655)[name = string("variance_121")]; fp32 var_5658 = const()[name = string("op_5658"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5659 = add(x = variance_121, y = var_5658)[name = string("op_5659")]; fp32 var_5660_epsilon_0 = const()[name = string("op_5660_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5660 = rsqrt(epsilon = var_5660_epsilon_0, x = var_5659)[name = string("op_5660")]; tensor hidden_states_903 = mul(x = hidden_states_899, y = var_5660)[name = string("hidden_states_903")]; tensor hidden_states_907 = mul(x = model_model_layers_30_input_layernorm_weight, y = hidden_states_903)[name = string("hidden_states_907")]; tensor linear_210 = linear(bias = model_model_layers_30_self_attn_q_proj_bias, weight = model_model_layers_30_self_attn_q_proj_weight_quantized, x = hidden_states_907)[name = string("linear_210")]; tensor linear_211 = linear(bias = model_model_layers_30_self_attn_k_proj_bias, weight = model_model_layers_30_self_attn_k_proj_weight_quantized, x = hidden_states_907)[name = string("linear_211")]; tensor linear_212 = linear(bias = model_model_layers_30_self_attn_v_proj_bias, weight = model_model_layers_30_self_attn_v_proj_weight_quantized, x = hidden_states_907)[name = string("linear_212")]; tensor concat_570x = const()[name = string("concat_570x"), val = tensor([1, -1, 16, 128])]; tensor var_5683 = reshape(shape = concat_570x, x = linear_210)[name = string("op_5683")]; tensor q_61_perm_0 = const()[name = string("q_61_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_571x = const()[name = string("concat_571x"), val = tensor([1, -1, 2, 128])]; tensor var_5686 = reshape(shape = concat_571x, x = linear_211)[name = string("op_5686")]; tensor k_61_perm_0 = const()[name = string("k_61_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_572x = const()[name = string("concat_572x"), val = tensor([1, -1, 2, 128])]; tensor var_5689 = reshape(shape = concat_572x, x = linear_212)[name = string("op_5689")]; tensor v_state_61_perm_0 = const()[name = string("v_state_61_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_61 = transpose(perm = q_61_perm_0, x = var_5683)[name = string("transpose_23")]; tensor var_5693 = mul(x = q_61, y = cos_7)[name = string("op_5693")]; tensor x1_121_begin_0 = const()[name = string("x1_121_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_121_end_0 = const()[name = string("x1_121_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_121_end_mask_0 = const()[name = string("x1_121_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_121 = slice_by_index(begin = x1_121_begin_0, end = x1_121_end_0, end_mask = x1_121_end_mask_0, x = q_61)[name = string("x1_121")]; tensor x2_121_begin_0 = const()[name = string("x2_121_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_121_end_0 = const()[name = string("x2_121_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_121_end_mask_0 = const()[name = string("x2_121_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_121 = slice_by_index(begin = x2_121_begin_0, end = x2_121_end_0, end_mask = x2_121_end_mask_0, x = q_61)[name = string("x2_121")]; fp32 const_63_promoted = const()[name = string("const_63_promoted"), val = fp32(-0x1p+0)]; tensor var_5704 = mul(x = x2_121, y = const_63_promoted)[name = string("op_5704")]; bool var_5706_interleave_0 = const()[name = string("op_5706_interleave_0"), val = bool(false)]; tensor var_5706 = concat(axis = var_88, interleave = var_5706_interleave_0, values = (var_5704, x1_121))[name = string("op_5706")]; tensor var_5707 = mul(x = var_5706, y = sin_7)[name = string("op_5707")]; tensor query_states_123 = add(x = var_5693, y = var_5707)[name = string("query_states_123")]; tensor k_61 = transpose(perm = k_61_perm_0, x = var_5686)[name = string("transpose_22")]; tensor var_5709 = mul(x = k_61, y = cos_7)[name = string("op_5709")]; tensor x1_123_begin_0 = const()[name = string("x1_123_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_123_end_0 = const()[name = string("x1_123_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_123_end_mask_0 = const()[name = string("x1_123_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_123 = slice_by_index(begin = x1_123_begin_0, end = x1_123_end_0, end_mask = x1_123_end_mask_0, x = k_61)[name = string("x1_123")]; tensor x2_123_begin_0 = const()[name = string("x2_123_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_123_end_0 = const()[name = string("x2_123_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_123_end_mask_0 = const()[name = string("x2_123_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_123 = slice_by_index(begin = x2_123_begin_0, end = x2_123_end_0, end_mask = x2_123_end_mask_0, x = k_61)[name = string("x2_123")]; fp32 const_64_promoted = const()[name = string("const_64_promoted"), val = fp32(-0x1p+0)]; tensor var_5720 = mul(x = x2_123, y = const_64_promoted)[name = string("op_5720")]; bool var_5722_interleave_0 = const()[name = string("op_5722_interleave_0"), val = bool(false)]; tensor var_5722 = concat(axis = var_88, interleave = var_5722_interleave_0, values = (var_5720, x1_123))[name = string("op_5722")]; tensor var_5723 = mul(x = var_5722, y = sin_7)[name = string("op_5723")]; tensor k_state_61 = add(x = var_5709, y = var_5723)[name = string("k_state_61")]; tensor expand_dims_360 = const()[name = string("expand_dims_360"), val = tensor([0])]; tensor expand_dims_361 = const()[name = string("expand_dims_361"), val = tensor([0])]; tensor expand_dims_363 = const()[name = string("expand_dims_363"), val = tensor([0])]; tensor concat_575_values0_0 = const()[name = string("concat_575_values0_0"), val = tensor([30])]; int32 concat_575_axis_0 = const()[name = string("concat_575_axis_0"), val = int32(0)]; bool concat_575_interleave_0 = const()[name = string("concat_575_interleave_0"), val = bool(false)]; tensor concat_575 = concat(axis = concat_575_axis_0, interleave = concat_575_interleave_0, values = (concat_575_values0_0, expand_dims_360, expand_dims_361, expand_dims_2, expand_dims_363))[name = string("concat_575")]; tensor key_cache_internal_tensor_assign_31_stride_0 = const()[name = string("key_cache_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_31_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_31_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_31 = slice_update(begin = concat_575, begin_mask = key_cache_internal_tensor_assign_31_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_31_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_31_squeeze_mask_0, stride = key_cache_internal_tensor_assign_31_stride_0, update = k_state_61, x = key_cache_internal_tensor_assign_30_1)[name = string("key_cache_internal_tensor_assign_31")]; string cast_433_dtype_0 = const()[name = string("cast_433_dtype_0"), val = string("fp16")]; tensor cast_433 = cast(dtype = cast_433_dtype_0, x = key_cache_internal_tensor_assign_31)[name = string("cast_24")]; write_state(data = cast_433, input = key_cache)[name = string("coreml_update_state_132_write_state")]; tensor coreml_update_state_132 = read_state(input = key_cache)[name = string("coreml_update_state_132")]; string key_cache_internal_tensor_assign_31_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_31_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_31_stride_0 = const()[name = string("value_cache_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_31_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_31_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_61 = transpose(perm = v_state_61_perm_0, x = var_5689)[name = string("transpose_21")]; tensor value_cache_internal_tensor_assign_31 = slice_update(begin = concat_575, begin_mask = value_cache_internal_tensor_assign_31_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_31_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_31_squeeze_mask_0, stride = value_cache_internal_tensor_assign_31_stride_0, update = v_state_61, x = value_cache_internal_tensor_assign_30_1)[name = string("value_cache_internal_tensor_assign_31")]; string cast_434_dtype_0 = const()[name = string("cast_434_dtype_0"), val = string("fp16")]; tensor cast_434 = cast(dtype = cast_434_dtype_0, x = value_cache_internal_tensor_assign_31)[name = string("cast_23")]; write_state(data = cast_434, input = value_cache)[name = string("coreml_update_state_133_write_state")]; tensor coreml_update_state_133 = read_state(input = value_cache)[name = string("coreml_update_state_133")]; string value_cache_internal_tensor_assign_31_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_31_dtype_0"), val = string("fp32")]; tensor var_5746_begin_0 = const()[name = string("op_5746_begin_0"), val = tensor([30, 0, 0, 0, 0])]; tensor var_5746_end_0 = const()[name = string("op_5746_end_0"), val = tensor([31, 1, 2, 2048, 128])]; tensor var_5746_end_mask_0 = const()[name = string("op_5746_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5746_squeeze_mask_0 = const()[name = string("op_5746_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_31_1 = cast(dtype = key_cache_internal_tensor_assign_31_dtype_0, x = coreml_update_state_132)[name = string("cast_22")]; tensor var_5746 = slice_by_index(begin = var_5746_begin_0, end = var_5746_end_0, end_mask = var_5746_end_mask_0, squeeze_mask = var_5746_squeeze_mask_0, x = key_cache_internal_tensor_assign_31_1)[name = string("op_5746")]; tensor var_5749_begin_0 = const()[name = string("op_5749_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5749_end_mask_0 = const()[name = string("op_5749_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5749 = slice_by_index(begin = var_5749_begin_0, end = concat_11, end_mask = var_5749_end_mask_0, x = var_5746)[name = string("op_5749")]; tensor var_5751_begin_0 = const()[name = string("op_5751_begin_0"), val = tensor([30, 0, 0, 0, 0])]; tensor var_5751_end_0 = const()[name = string("op_5751_end_0"), val = tensor([31, 1, 2, 2048, 128])]; tensor var_5751_end_mask_0 = const()[name = string("op_5751_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5751_squeeze_mask_0 = const()[name = string("op_5751_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_31_1 = cast(dtype = value_cache_internal_tensor_assign_31_dtype_0, x = coreml_update_state_133)[name = string("cast_21")]; tensor var_5751 = slice_by_index(begin = var_5751_begin_0, end = var_5751_end_0, end_mask = var_5751_end_mask_0, squeeze_mask = var_5751_squeeze_mask_0, x = value_cache_internal_tensor_assign_31_1)[name = string("op_5751")]; tensor var_5754_begin_0 = const()[name = string("op_5754_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5754_end_mask_0 = const()[name = string("op_5754_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5754 = slice_by_index(begin = var_5754_begin_0, end = concat_11, end_mask = var_5754_end_mask_0, x = var_5751)[name = string("op_5754")]; tensor var_5756_shape = shape(x = var_5749)[name = string("op_5756_shape")]; int32 gather_553 = const()[name = string("gather_553"), val = int32(1)]; int32 gather_554 = const()[name = string("gather_554"), val = int32(2)]; int32 select_555 = const()[name = string("select_555"), val = int32(2)]; int32 gather_555_axis_0 = const()[name = string("gather_555_axis_0"), val = int32(0)]; int32 gather_555_batch_dims_0 = const()[name = string("gather_555_batch_dims_0"), val = int32(0)]; bool gather_555_validate_indices_0 = const()[name = string("gather_555_validate_indices_0"), val = bool(false)]; int32 gather_555 = gather(axis = gather_555_axis_0, batch_dims = gather_555_batch_dims_0, indices = select_555, validate_indices = gather_555_validate_indices_0, x = var_5756_shape)[name = string("gather_555")]; int32 gather_556 = const()[name = string("gather_556"), val = int32(128)]; tensor var_5763_axes_0 = const()[name = string("op_5763_axes_0"), val = tensor([2])]; tensor var_5763 = expand_dims(axes = var_5763_axes_0, x = var_5749)[name = string("op_5763")]; tensor shape_617 = shape(x = var_5763)[name = string("shape_617")]; int32 concat_583_axis_0 = const()[name = string("concat_583_axis_0"), val = int32(0)]; bool concat_583_interleave_0 = const()[name = string("concat_583_interleave_0"), val = bool(false)]; tensor concat_583 = concat(axis = concat_583_axis_0, interleave = concat_583_interleave_0, values = (gather_553, gather_554, var_100, gather_555, gather_556))[name = string("concat_583")]; tensor real_div_60 = real_div(x = concat_583, y = shape_617)[name = string("real_div_60")]; tensor hidden_states_911 = tile(reps = real_div_60, x = var_5763)[name = string("hidden_states_911")]; tensor concat_584x = const()[name = string("concat_584x"), val = tensor([1, 16, -1, 128])]; tensor key_states_123 = reshape(shape = concat_584x, x = hidden_states_911)[name = string("key_states_123")]; tensor var_5773_shape = shape(x = var_5754)[name = string("op_5773_shape")]; int32 gather_557 = const()[name = string("gather_557"), val = int32(1)]; int32 gather_558 = const()[name = string("gather_558"), val = int32(2)]; int32 select_559 = const()[name = string("select_559"), val = int32(2)]; int32 gather_559_axis_0 = const()[name = string("gather_559_axis_0"), val = int32(0)]; int32 gather_559_batch_dims_0 = const()[name = string("gather_559_batch_dims_0"), val = int32(0)]; bool gather_559_validate_indices_0 = const()[name = string("gather_559_validate_indices_0"), val = bool(false)]; int32 gather_559 = gather(axis = gather_559_axis_0, batch_dims = gather_559_batch_dims_0, indices = select_559, validate_indices = gather_559_validate_indices_0, x = var_5773_shape)[name = string("gather_559")]; int32 gather_560 = const()[name = string("gather_560"), val = int32(128)]; tensor var_5780_axes_0 = const()[name = string("op_5780_axes_0"), val = tensor([2])]; tensor var_5780 = expand_dims(axes = var_5780_axes_0, x = var_5754)[name = string("op_5780")]; tensor shape_622 = shape(x = var_5780)[name = string("shape_622")]; int32 concat_585_axis_0 = const()[name = string("concat_585_axis_0"), val = int32(0)]; bool concat_585_interleave_0 = const()[name = string("concat_585_interleave_0"), val = bool(false)]; tensor concat_585 = concat(axis = concat_585_axis_0, interleave = concat_585_interleave_0, values = (gather_557, gather_558, var_100, gather_559, gather_560))[name = string("concat_585")]; tensor real_div_61 = real_div(x = concat_585, y = shape_622)[name = string("real_div_61")]; tensor hidden_states_915 = tile(reps = real_div_61, x = var_5780)[name = string("hidden_states_915")]; tensor concat_586x = const()[name = string("concat_586x"), val = tensor([1, 16, -1, 128])]; tensor value_states_123 = reshape(shape = concat_586x, x = hidden_states_915)[name = string("value_states_123")]; tensor var_5790_shape = shape(x = key_states_123)[name = string("op_5790_shape")]; int32 select_561 = const()[name = string("select_561"), val = int32(2)]; int32 gather_561_axis_0 = const()[name = string("gather_561_axis_0"), val = int32(0)]; int32 gather_561_batch_dims_0 = const()[name = string("gather_561_batch_dims_0"), val = int32(0)]; bool gather_561_validate_indices_0 = const()[name = string("gather_561_validate_indices_0"), val = bool(false)]; int32 gather_561 = gather(axis = gather_561_axis_0, batch_dims = gather_561_batch_dims_0, indices = select_561, validate_indices = gather_561_validate_indices_0, x = var_5790_shape)[name = string("gather_561")]; int32 concat_587_values0_0 = const()[name = string("concat_587_values0_0"), val = int32(1)]; int32 concat_587_values1_0 = const()[name = string("concat_587_values1_0"), val = int32(1)]; int32 concat_587_values2_0 = const()[name = string("concat_587_values2_0"), val = int32(0)]; int32 concat_587_axis_0 = const()[name = string("concat_587_axis_0"), val = int32(0)]; bool concat_587_interleave_0 = const()[name = string("concat_587_interleave_0"), val = bool(false)]; tensor concat_587 = concat(axis = concat_587_axis_0, interleave = concat_587_interleave_0, values = (concat_587_values0_0, concat_587_values1_0, concat_587_values2_0, gather_561))[name = string("concat_587")]; tensor causal_mask_63_begin_0 = const()[name = string("causal_mask_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_63_end_mask_0 = const()[name = string("causal_mask_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_63 = slice_by_index(begin = causal_mask_63_begin_0, end = concat_587, end_mask = causal_mask_63_end_mask_0, x = cast_0)[name = string("causal_mask_63")]; tensor attn_output_121 = scaled_dot_product_attention(attn_mask = causal_mask_63, key = key_states_123, query = query_states_123, value = value_states_123)[name = string("attn_output_121")]; tensor var_5796_perm_0 = const()[name = string("op_5796_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_588x = const()[name = string("concat_588x"), val = tensor([1, -1, 2048])]; tensor var_5796 = transpose(perm = var_5796_perm_0, x = attn_output_121)[name = string("transpose_20")]; tensor input_241 = reshape(shape = concat_588x, x = var_5796)[name = string("input_241")]; tensor linear_213 = linear(bias = linear_3_bias_0, weight = model_model_layers_30_self_attn_o_proj_weight_quantized, x = input_241)[name = string("linear_213")]; tensor hidden_states_919 = add(x = hidden_states_899, y = linear_213)[name = string("hidden_states_919")]; fp32 var_94_promoted_61 = const()[name = string("op_94_promoted_61"), val = fp32(0x1p+1)]; tensor var_5805 = pow(x = hidden_states_919, y = var_94_promoted_61)[name = string("op_5805")]; tensor variance_123_axes_0 = const()[name = string("variance_123_axes_0"), val = tensor([-1])]; bool variance_123_keep_dims_0 = const()[name = string("variance_123_keep_dims_0"), val = bool(true)]; tensor variance_123 = reduce_mean(axes = variance_123_axes_0, keep_dims = variance_123_keep_dims_0, x = var_5805)[name = string("variance_123")]; fp32 var_5808 = const()[name = string("op_5808"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5809 = add(x = variance_123, y = var_5808)[name = string("op_5809")]; fp32 var_5810_epsilon_0 = const()[name = string("op_5810_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5810 = rsqrt(epsilon = var_5810_epsilon_0, x = var_5809)[name = string("op_5810")]; tensor hidden_states_923 = mul(x = hidden_states_919, y = var_5810)[name = string("hidden_states_923")]; tensor input_243 = mul(x = model_model_layers_30_post_attention_layernorm_weight, y = hidden_states_923)[name = string("input_243")]; tensor linear_214 = linear(bias = linear_4_bias_0, weight = model_model_layers_30_mlp_gate_proj_weight_quantized, x = input_243)[name = string("linear_214")]; tensor var_5822 = silu(x = linear_214)[name = string("op_5822")]; tensor linear_215 = linear(bias = linear_4_bias_0, weight = model_model_layers_30_mlp_up_proj_weight_quantized, x = input_243)[name = string("linear_215")]; tensor input_247 = mul(x = var_5822, y = linear_215)[name = string("input_247")]; tensor linear_216 = linear(bias = linear_3_bias_0, weight = model_model_layers_30_mlp_down_proj_weight_quantized, x = input_247)[name = string("linear_216")]; tensor hidden_states_929 = add(x = hidden_states_919, y = linear_216)[name = string("hidden_states_929")]; fp32 var_94_promoted_62 = const()[name = string("op_94_promoted_62"), val = fp32(0x1p+1)]; tensor var_5835 = pow(x = hidden_states_929, y = var_94_promoted_62)[name = string("op_5835")]; tensor variance_125_axes_0 = const()[name = string("variance_125_axes_0"), val = tensor([-1])]; bool variance_125_keep_dims_0 = const()[name = string("variance_125_keep_dims_0"), val = bool(true)]; tensor variance_125 = reduce_mean(axes = variance_125_axes_0, keep_dims = variance_125_keep_dims_0, x = var_5835)[name = string("variance_125")]; fp32 var_5838 = const()[name = string("op_5838"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5839 = add(x = variance_125, y = var_5838)[name = string("op_5839")]; fp32 var_5840_epsilon_0 = const()[name = string("op_5840_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5840 = rsqrt(epsilon = var_5840_epsilon_0, x = var_5839)[name = string("op_5840")]; tensor hidden_states_933 = mul(x = hidden_states_929, y = var_5840)[name = string("hidden_states_933")]; tensor hidden_states_937 = mul(x = model_model_layers_31_input_layernorm_weight, y = hidden_states_933)[name = string("hidden_states_937")]; tensor linear_217 = linear(bias = model_model_layers_31_self_attn_q_proj_bias, weight = model_model_layers_31_self_attn_q_proj_weight_quantized, x = hidden_states_937)[name = string("linear_217")]; tensor linear_218 = linear(bias = model_model_layers_31_self_attn_k_proj_bias, weight = model_model_layers_31_self_attn_k_proj_weight_quantized, x = hidden_states_937)[name = string("linear_218")]; tensor linear_219 = linear(bias = model_model_layers_31_self_attn_v_proj_bias, weight = model_model_layers_31_self_attn_v_proj_weight_quantized, x = hidden_states_937)[name = string("linear_219")]; tensor concat_589x = const()[name = string("concat_589x"), val = tensor([1, -1, 16, 128])]; tensor var_5863 = reshape(shape = concat_589x, x = linear_217)[name = string("op_5863")]; tensor q_63_perm_0 = const()[name = string("q_63_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_590x = const()[name = string("concat_590x"), val = tensor([1, -1, 2, 128])]; tensor var_5866 = reshape(shape = concat_590x, x = linear_218)[name = string("op_5866")]; tensor k_63_perm_0 = const()[name = string("k_63_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_591x = const()[name = string("concat_591x"), val = tensor([1, -1, 2, 128])]; tensor var_5869 = reshape(shape = concat_591x, x = linear_219)[name = string("op_5869")]; tensor v_state_63_perm_0 = const()[name = string("v_state_63_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_63 = transpose(perm = q_63_perm_0, x = var_5863)[name = string("transpose_19")]; tensor var_5873 = mul(x = q_63, y = cos_7)[name = string("op_5873")]; tensor x1_125_begin_0 = const()[name = string("x1_125_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_125_end_0 = const()[name = string("x1_125_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_125_end_mask_0 = const()[name = string("x1_125_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_125 = slice_by_index(begin = x1_125_begin_0, end = x1_125_end_0, end_mask = x1_125_end_mask_0, x = q_63)[name = string("x1_125")]; tensor x2_125_begin_0 = const()[name = string("x2_125_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_125_end_0 = const()[name = string("x2_125_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_125_end_mask_0 = const()[name = string("x2_125_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_125 = slice_by_index(begin = x2_125_begin_0, end = x2_125_end_0, end_mask = x2_125_end_mask_0, x = q_63)[name = string("x2_125")]; fp32 const_65_promoted = const()[name = string("const_65_promoted"), val = fp32(-0x1p+0)]; tensor var_5884 = mul(x = x2_125, y = const_65_promoted)[name = string("op_5884")]; bool var_5886_interleave_0 = const()[name = string("op_5886_interleave_0"), val = bool(false)]; tensor var_5886 = concat(axis = var_88, interleave = var_5886_interleave_0, values = (var_5884, x1_125))[name = string("op_5886")]; tensor var_5887 = mul(x = var_5886, y = sin_7)[name = string("op_5887")]; tensor query_states_127 = add(x = var_5873, y = var_5887)[name = string("query_states_127")]; tensor k_63 = transpose(perm = k_63_perm_0, x = var_5866)[name = string("transpose_18")]; tensor var_5889 = mul(x = k_63, y = cos_7)[name = string("op_5889")]; tensor x1_127_begin_0 = const()[name = string("x1_127_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_127_end_0 = const()[name = string("x1_127_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_127_end_mask_0 = const()[name = string("x1_127_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_127 = slice_by_index(begin = x1_127_begin_0, end = x1_127_end_0, end_mask = x1_127_end_mask_0, x = k_63)[name = string("x1_127")]; tensor x2_127_begin_0 = const()[name = string("x2_127_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_127_end_0 = const()[name = string("x2_127_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_127_end_mask_0 = const()[name = string("x2_127_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_127 = slice_by_index(begin = x2_127_begin_0, end = x2_127_end_0, end_mask = x2_127_end_mask_0, x = k_63)[name = string("x2_127")]; fp32 const_66_promoted = const()[name = string("const_66_promoted"), val = fp32(-0x1p+0)]; tensor var_5900 = mul(x = x2_127, y = const_66_promoted)[name = string("op_5900")]; bool var_5902_interleave_0 = const()[name = string("op_5902_interleave_0"), val = bool(false)]; tensor var_5902 = concat(axis = var_88, interleave = var_5902_interleave_0, values = (var_5900, x1_127))[name = string("op_5902")]; tensor var_5903 = mul(x = var_5902, y = sin_7)[name = string("op_5903")]; tensor k_state_63 = add(x = var_5889, y = var_5903)[name = string("k_state_63")]; tensor expand_dims_372 = const()[name = string("expand_dims_372"), val = tensor([0])]; tensor expand_dims_373 = const()[name = string("expand_dims_373"), val = tensor([0])]; tensor expand_dims_375 = const()[name = string("expand_dims_375"), val = tensor([0])]; tensor concat_594_values0_0 = const()[name = string("concat_594_values0_0"), val = tensor([31])]; int32 concat_594_axis_0 = const()[name = string("concat_594_axis_0"), val = int32(0)]; bool concat_594_interleave_0 = const()[name = string("concat_594_interleave_0"), val = bool(false)]; tensor concat_594 = concat(axis = concat_594_axis_0, interleave = concat_594_interleave_0, values = (concat_594_values0_0, expand_dims_372, expand_dims_373, expand_dims_2, expand_dims_375))[name = string("concat_594")]; tensor key_cache_internal_tensor_assign_32_stride_0 = const()[name = string("key_cache_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_32_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_32_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_32 = slice_update(begin = concat_594, begin_mask = key_cache_internal_tensor_assign_32_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_32_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_32_squeeze_mask_0, stride = key_cache_internal_tensor_assign_32_stride_0, update = k_state_63, x = key_cache_internal_tensor_assign_31_1)[name = string("key_cache_internal_tensor_assign_32")]; string cast_447_dtype_0 = const()[name = string("cast_447_dtype_0"), val = string("fp16")]; tensor cast_447 = cast(dtype = cast_447_dtype_0, x = key_cache_internal_tensor_assign_32)[name = string("cast_20")]; write_state(data = cast_447, input = key_cache)[name = string("coreml_update_state_134_write_state")]; tensor coreml_update_state_134 = read_state(input = key_cache)[name = string("coreml_update_state_134")]; string key_cache_internal_tensor_assign_32_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_32_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_32_stride_0 = const()[name = string("value_cache_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_32_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_32_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_63 = transpose(perm = v_state_63_perm_0, x = var_5869)[name = string("transpose_17")]; tensor value_cache_internal_tensor_assign_32 = slice_update(begin = concat_594, begin_mask = value_cache_internal_tensor_assign_32_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_32_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_32_squeeze_mask_0, stride = value_cache_internal_tensor_assign_32_stride_0, update = v_state_63, x = value_cache_internal_tensor_assign_31_1)[name = string("value_cache_internal_tensor_assign_32")]; string cast_448_dtype_0 = const()[name = string("cast_448_dtype_0"), val = string("fp16")]; tensor cast_448 = cast(dtype = cast_448_dtype_0, x = value_cache_internal_tensor_assign_32)[name = string("cast_19")]; write_state(data = cast_448, input = value_cache)[name = string("coreml_update_state_135_write_state")]; tensor coreml_update_state_135 = read_state(input = value_cache)[name = string("coreml_update_state_135")]; string value_cache_internal_tensor_assign_32_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_32_dtype_0"), val = string("fp32")]; tensor var_5926_begin_0 = const()[name = string("op_5926_begin_0"), val = tensor([31, 0, 0, 0, 0])]; tensor var_5926_end_0 = const()[name = string("op_5926_end_0"), val = tensor([32, 1, 2, 2048, 128])]; tensor var_5926_end_mask_0 = const()[name = string("op_5926_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5926_squeeze_mask_0 = const()[name = string("op_5926_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_32_1 = cast(dtype = key_cache_internal_tensor_assign_32_dtype_0, x = coreml_update_state_134)[name = string("cast_18")]; tensor var_5926 = slice_by_index(begin = var_5926_begin_0, end = var_5926_end_0, end_mask = var_5926_end_mask_0, squeeze_mask = var_5926_squeeze_mask_0, x = key_cache_internal_tensor_assign_32_1)[name = string("op_5926")]; tensor var_5929_begin_0 = const()[name = string("op_5929_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5929_end_mask_0 = const()[name = string("op_5929_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5929 = slice_by_index(begin = var_5929_begin_0, end = concat_11, end_mask = var_5929_end_mask_0, x = var_5926)[name = string("op_5929")]; tensor var_5931_begin_0 = const()[name = string("op_5931_begin_0"), val = tensor([31, 0, 0, 0, 0])]; tensor var_5931_end_0 = const()[name = string("op_5931_end_0"), val = tensor([32, 1, 2, 2048, 128])]; tensor var_5931_end_mask_0 = const()[name = string("op_5931_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5931_squeeze_mask_0 = const()[name = string("op_5931_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_32_1 = cast(dtype = value_cache_internal_tensor_assign_32_dtype_0, x = coreml_update_state_135)[name = string("cast_17")]; tensor var_5931 = slice_by_index(begin = var_5931_begin_0, end = var_5931_end_0, end_mask = var_5931_end_mask_0, squeeze_mask = var_5931_squeeze_mask_0, x = value_cache_internal_tensor_assign_32_1)[name = string("op_5931")]; tensor var_5934_begin_0 = const()[name = string("op_5934_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5934_end_mask_0 = const()[name = string("op_5934_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5934 = slice_by_index(begin = var_5934_begin_0, end = concat_11, end_mask = var_5934_end_mask_0, x = var_5931)[name = string("op_5934")]; tensor var_5936_shape = shape(x = var_5929)[name = string("op_5936_shape")]; int32 gather_571 = const()[name = string("gather_571"), val = int32(1)]; int32 gather_572 = const()[name = string("gather_572"), val = int32(2)]; int32 select_573 = const()[name = string("select_573"), val = int32(2)]; int32 gather_573_axis_0 = const()[name = string("gather_573_axis_0"), val = int32(0)]; int32 gather_573_batch_dims_0 = const()[name = string("gather_573_batch_dims_0"), val = int32(0)]; bool gather_573_validate_indices_0 = const()[name = string("gather_573_validate_indices_0"), val = bool(false)]; int32 gather_573 = gather(axis = gather_573_axis_0, batch_dims = gather_573_batch_dims_0, indices = select_573, validate_indices = gather_573_validate_indices_0, x = var_5936_shape)[name = string("gather_573")]; int32 gather_574 = const()[name = string("gather_574"), val = int32(128)]; tensor var_5943_axes_0 = const()[name = string("op_5943_axes_0"), val = tensor([2])]; tensor var_5943 = expand_dims(axes = var_5943_axes_0, x = var_5929)[name = string("op_5943")]; tensor shape_637 = shape(x = var_5943)[name = string("shape_637")]; int32 concat_602_axis_0 = const()[name = string("concat_602_axis_0"), val = int32(0)]; bool concat_602_interleave_0 = const()[name = string("concat_602_interleave_0"), val = bool(false)]; tensor concat_602 = concat(axis = concat_602_axis_0, interleave = concat_602_interleave_0, values = (gather_571, gather_572, var_100, gather_573, gather_574))[name = string("concat_602")]; tensor real_div_62 = real_div(x = concat_602, y = shape_637)[name = string("real_div_62")]; tensor hidden_states_941 = tile(reps = real_div_62, x = var_5943)[name = string("hidden_states_941")]; tensor concat_603x = const()[name = string("concat_603x"), val = tensor([1, 16, -1, 128])]; tensor key_states_127 = reshape(shape = concat_603x, x = hidden_states_941)[name = string("key_states_127")]; tensor var_5953_shape = shape(x = var_5934)[name = string("op_5953_shape")]; int32 gather_575 = const()[name = string("gather_575"), val = int32(1)]; int32 gather_576 = const()[name = string("gather_576"), val = int32(2)]; int32 select_577 = const()[name = string("select_577"), val = int32(2)]; int32 gather_577_axis_0 = const()[name = string("gather_577_axis_0"), val = int32(0)]; int32 gather_577_batch_dims_0 = const()[name = string("gather_577_batch_dims_0"), val = int32(0)]; bool gather_577_validate_indices_0 = const()[name = string("gather_577_validate_indices_0"), val = bool(false)]; int32 gather_577 = gather(axis = gather_577_axis_0, batch_dims = gather_577_batch_dims_0, indices = select_577, validate_indices = gather_577_validate_indices_0, x = var_5953_shape)[name = string("gather_577")]; int32 gather_578 = const()[name = string("gather_578"), val = int32(128)]; tensor var_5960_axes_0 = const()[name = string("op_5960_axes_0"), val = tensor([2])]; tensor var_5960 = expand_dims(axes = var_5960_axes_0, x = var_5934)[name = string("op_5960")]; tensor shape_642 = shape(x = var_5960)[name = string("shape_642")]; int32 concat_604_axis_0 = const()[name = string("concat_604_axis_0"), val = int32(0)]; bool concat_604_interleave_0 = const()[name = string("concat_604_interleave_0"), val = bool(false)]; tensor concat_604 = concat(axis = concat_604_axis_0, interleave = concat_604_interleave_0, values = (gather_575, gather_576, var_100, gather_577, gather_578))[name = string("concat_604")]; tensor real_div_63 = real_div(x = concat_604, y = shape_642)[name = string("real_div_63")]; tensor hidden_states_945 = tile(reps = real_div_63, x = var_5960)[name = string("hidden_states_945")]; tensor concat_605x = const()[name = string("concat_605x"), val = tensor([1, 16, -1, 128])]; tensor value_states_127 = reshape(shape = concat_605x, x = hidden_states_945)[name = string("value_states_127")]; tensor var_5970_shape = shape(x = key_states_127)[name = string("op_5970_shape")]; int32 select_579 = const()[name = string("select_579"), val = int32(2)]; int32 gather_579_axis_0 = const()[name = string("gather_579_axis_0"), val = int32(0)]; int32 gather_579_batch_dims_0 = const()[name = string("gather_579_batch_dims_0"), val = int32(0)]; bool gather_579_validate_indices_0 = const()[name = string("gather_579_validate_indices_0"), val = bool(false)]; int32 gather_579 = gather(axis = gather_579_axis_0, batch_dims = gather_579_batch_dims_0, indices = select_579, validate_indices = gather_579_validate_indices_0, x = var_5970_shape)[name = string("gather_579")]; int32 concat_606_values0_0 = const()[name = string("concat_606_values0_0"), val = int32(1)]; int32 concat_606_values1_0 = const()[name = string("concat_606_values1_0"), val = int32(1)]; int32 concat_606_values2_0 = const()[name = string("concat_606_values2_0"), val = int32(0)]; int32 concat_606_axis_0 = const()[name = string("concat_606_axis_0"), val = int32(0)]; bool concat_606_interleave_0 = const()[name = string("concat_606_interleave_0"), val = bool(false)]; tensor concat_606 = concat(axis = concat_606_axis_0, interleave = concat_606_interleave_0, values = (concat_606_values0_0, concat_606_values1_0, concat_606_values2_0, gather_579))[name = string("concat_606")]; tensor causal_mask_65_begin_0 = const()[name = string("causal_mask_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_65_end_mask_0 = const()[name = string("causal_mask_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_65 = slice_by_index(begin = causal_mask_65_begin_0, end = concat_606, end_mask = causal_mask_65_end_mask_0, x = cast_0)[name = string("causal_mask_65")]; tensor attn_output_125 = scaled_dot_product_attention(attn_mask = causal_mask_65, key = key_states_127, query = query_states_127, value = value_states_127)[name = string("attn_output_125")]; tensor var_5976_perm_0 = const()[name = string("op_5976_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_607x = const()[name = string("concat_607x"), val = tensor([1, -1, 2048])]; tensor var_5976 = transpose(perm = var_5976_perm_0, x = attn_output_125)[name = string("transpose_16")]; tensor input_249 = reshape(shape = concat_607x, x = var_5976)[name = string("input_249")]; tensor linear_220 = linear(bias = linear_3_bias_0, weight = model_model_layers_31_self_attn_o_proj_weight_quantized, x = input_249)[name = string("linear_220")]; tensor hidden_states_949 = add(x = hidden_states_929, y = linear_220)[name = string("hidden_states_949")]; fp32 var_94_promoted_63 = const()[name = string("op_94_promoted_63"), val = fp32(0x1p+1)]; tensor var_5985 = pow(x = hidden_states_949, y = var_94_promoted_63)[name = string("op_5985")]; tensor variance_127_axes_0 = const()[name = string("variance_127_axes_0"), val = tensor([-1])]; bool variance_127_keep_dims_0 = const()[name = string("variance_127_keep_dims_0"), val = bool(true)]; tensor variance_127 = reduce_mean(axes = variance_127_axes_0, keep_dims = variance_127_keep_dims_0, x = var_5985)[name = string("variance_127")]; fp32 var_5988 = const()[name = string("op_5988"), val = fp32(0x1.0c6f7ap-20)]; tensor var_5989 = add(x = variance_127, y = var_5988)[name = string("op_5989")]; fp32 var_5990_epsilon_0 = const()[name = string("op_5990_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5990 = rsqrt(epsilon = var_5990_epsilon_0, x = var_5989)[name = string("op_5990")]; tensor hidden_states_953 = mul(x = hidden_states_949, y = var_5990)[name = string("hidden_states_953")]; tensor input_251 = mul(x = model_model_layers_31_post_attention_layernorm_weight, y = hidden_states_953)[name = string("input_251")]; tensor linear_221 = linear(bias = linear_4_bias_0, weight = model_model_layers_31_mlp_gate_proj_weight_quantized, x = input_251)[name = string("linear_221")]; tensor var_6002 = silu(x = linear_221)[name = string("op_6002")]; tensor linear_222 = linear(bias = linear_4_bias_0, weight = model_model_layers_31_mlp_up_proj_weight_quantized, x = input_251)[name = string("linear_222")]; tensor input_255 = mul(x = var_6002, y = linear_222)[name = string("input_255")]; tensor linear_223 = linear(bias = linear_3_bias_0, weight = model_model_layers_31_mlp_down_proj_weight_quantized, x = input_255)[name = string("linear_223")]; tensor hidden_states_959 = add(x = hidden_states_949, y = linear_223)[name = string("hidden_states_959")]; fp32 var_94_promoted_64 = const()[name = string("op_94_promoted_64"), val = fp32(0x1p+1)]; tensor var_6015 = pow(x = hidden_states_959, y = var_94_promoted_64)[name = string("op_6015")]; tensor variance_129_axes_0 = const()[name = string("variance_129_axes_0"), val = tensor([-1])]; bool variance_129_keep_dims_0 = const()[name = string("variance_129_keep_dims_0"), val = bool(true)]; tensor variance_129 = reduce_mean(axes = variance_129_axes_0, keep_dims = variance_129_keep_dims_0, x = var_6015)[name = string("variance_129")]; fp32 var_6018 = const()[name = string("op_6018"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6019 = add(x = variance_129, y = var_6018)[name = string("op_6019")]; fp32 var_6020_epsilon_0 = const()[name = string("op_6020_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6020 = rsqrt(epsilon = var_6020_epsilon_0, x = var_6019)[name = string("op_6020")]; tensor hidden_states_963 = mul(x = hidden_states_959, y = var_6020)[name = string("hidden_states_963")]; tensor hidden_states_967 = mul(x = model_model_layers_32_input_layernorm_weight, y = hidden_states_963)[name = string("hidden_states_967")]; tensor linear_224 = linear(bias = model_model_layers_32_self_attn_q_proj_bias, weight = model_model_layers_32_self_attn_q_proj_weight_quantized, x = hidden_states_967)[name = string("linear_224")]; tensor linear_225 = linear(bias = model_model_layers_32_self_attn_k_proj_bias, weight = model_model_layers_32_self_attn_k_proj_weight_quantized, x = hidden_states_967)[name = string("linear_225")]; tensor linear_226 = linear(bias = model_model_layers_32_self_attn_v_proj_bias, weight = model_model_layers_32_self_attn_v_proj_weight_quantized, x = hidden_states_967)[name = string("linear_226")]; tensor concat_608x = const()[name = string("concat_608x"), val = tensor([1, -1, 16, 128])]; tensor var_6043 = reshape(shape = concat_608x, x = linear_224)[name = string("op_6043")]; tensor q_65_perm_0 = const()[name = string("q_65_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_609x = const()[name = string("concat_609x"), val = tensor([1, -1, 2, 128])]; tensor var_6046 = reshape(shape = concat_609x, x = linear_225)[name = string("op_6046")]; tensor k_65_perm_0 = const()[name = string("k_65_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_610x = const()[name = string("concat_610x"), val = tensor([1, -1, 2, 128])]; tensor var_6049 = reshape(shape = concat_610x, x = linear_226)[name = string("op_6049")]; tensor v_state_65_perm_0 = const()[name = string("v_state_65_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_65 = transpose(perm = q_65_perm_0, x = var_6043)[name = string("transpose_15")]; tensor var_6053 = mul(x = q_65, y = cos_7)[name = string("op_6053")]; tensor x1_129_begin_0 = const()[name = string("x1_129_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_129_end_0 = const()[name = string("x1_129_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_129_end_mask_0 = const()[name = string("x1_129_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_129 = slice_by_index(begin = x1_129_begin_0, end = x1_129_end_0, end_mask = x1_129_end_mask_0, x = q_65)[name = string("x1_129")]; tensor x2_129_begin_0 = const()[name = string("x2_129_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_129_end_0 = const()[name = string("x2_129_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_129_end_mask_0 = const()[name = string("x2_129_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_129 = slice_by_index(begin = x2_129_begin_0, end = x2_129_end_0, end_mask = x2_129_end_mask_0, x = q_65)[name = string("x2_129")]; fp32 const_67_promoted = const()[name = string("const_67_promoted"), val = fp32(-0x1p+0)]; tensor var_6064 = mul(x = x2_129, y = const_67_promoted)[name = string("op_6064")]; bool var_6066_interleave_0 = const()[name = string("op_6066_interleave_0"), val = bool(false)]; tensor var_6066 = concat(axis = var_88, interleave = var_6066_interleave_0, values = (var_6064, x1_129))[name = string("op_6066")]; tensor var_6067 = mul(x = var_6066, y = sin_7)[name = string("op_6067")]; tensor query_states_131 = add(x = var_6053, y = var_6067)[name = string("query_states_131")]; tensor k_65 = transpose(perm = k_65_perm_0, x = var_6046)[name = string("transpose_14")]; tensor var_6069 = mul(x = k_65, y = cos_7)[name = string("op_6069")]; tensor x1_131_begin_0 = const()[name = string("x1_131_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_131_end_0 = const()[name = string("x1_131_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_131_end_mask_0 = const()[name = string("x1_131_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_131 = slice_by_index(begin = x1_131_begin_0, end = x1_131_end_0, end_mask = x1_131_end_mask_0, x = k_65)[name = string("x1_131")]; tensor x2_131_begin_0 = const()[name = string("x2_131_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_131_end_0 = const()[name = string("x2_131_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_131_end_mask_0 = const()[name = string("x2_131_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_131 = slice_by_index(begin = x2_131_begin_0, end = x2_131_end_0, end_mask = x2_131_end_mask_0, x = k_65)[name = string("x2_131")]; fp32 const_68_promoted = const()[name = string("const_68_promoted"), val = fp32(-0x1p+0)]; tensor var_6080 = mul(x = x2_131, y = const_68_promoted)[name = string("op_6080")]; bool var_6082_interleave_0 = const()[name = string("op_6082_interleave_0"), val = bool(false)]; tensor var_6082 = concat(axis = var_88, interleave = var_6082_interleave_0, values = (var_6080, x1_131))[name = string("op_6082")]; tensor var_6083 = mul(x = var_6082, y = sin_7)[name = string("op_6083")]; tensor k_state_65 = add(x = var_6069, y = var_6083)[name = string("k_state_65")]; tensor expand_dims_384 = const()[name = string("expand_dims_384"), val = tensor([0])]; tensor expand_dims_385 = const()[name = string("expand_dims_385"), val = tensor([0])]; tensor expand_dims_387 = const()[name = string("expand_dims_387"), val = tensor([0])]; tensor concat_613_values0_0 = const()[name = string("concat_613_values0_0"), val = tensor([32])]; int32 concat_613_axis_0 = const()[name = string("concat_613_axis_0"), val = int32(0)]; bool concat_613_interleave_0 = const()[name = string("concat_613_interleave_0"), val = bool(false)]; tensor concat_613 = concat(axis = concat_613_axis_0, interleave = concat_613_interleave_0, values = (concat_613_values0_0, expand_dims_384, expand_dims_385, expand_dims_2, expand_dims_387))[name = string("concat_613")]; tensor key_cache_internal_tensor_assign_33_stride_0 = const()[name = string("key_cache_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_33_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_33_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_33 = slice_update(begin = concat_613, begin_mask = key_cache_internal_tensor_assign_33_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_33_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_33_squeeze_mask_0, stride = key_cache_internal_tensor_assign_33_stride_0, update = k_state_65, x = key_cache_internal_tensor_assign_32_1)[name = string("key_cache_internal_tensor_assign_33")]; string cast_461_dtype_0 = const()[name = string("cast_461_dtype_0"), val = string("fp16")]; tensor cast_461 = cast(dtype = cast_461_dtype_0, x = key_cache_internal_tensor_assign_33)[name = string("cast_16")]; write_state(data = cast_461, input = key_cache)[name = string("coreml_update_state_136_write_state")]; tensor coreml_update_state_136 = read_state(input = key_cache)[name = string("coreml_update_state_136")]; string key_cache_internal_tensor_assign_33_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_33_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_33_stride_0 = const()[name = string("value_cache_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_33_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_33_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_65 = transpose(perm = v_state_65_perm_0, x = var_6049)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_33 = slice_update(begin = concat_613, begin_mask = value_cache_internal_tensor_assign_33_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_33_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_33_squeeze_mask_0, stride = value_cache_internal_tensor_assign_33_stride_0, update = v_state_65, x = value_cache_internal_tensor_assign_32_1)[name = string("value_cache_internal_tensor_assign_33")]; string cast_462_dtype_0 = const()[name = string("cast_462_dtype_0"), val = string("fp16")]; tensor cast_462 = cast(dtype = cast_462_dtype_0, x = value_cache_internal_tensor_assign_33)[name = string("cast_15")]; write_state(data = cast_462, input = value_cache)[name = string("coreml_update_state_137_write_state")]; tensor coreml_update_state_137 = read_state(input = value_cache)[name = string("coreml_update_state_137")]; string value_cache_internal_tensor_assign_33_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_33_dtype_0"), val = string("fp32")]; tensor var_6106_begin_0 = const()[name = string("op_6106_begin_0"), val = tensor([32, 0, 0, 0, 0])]; tensor var_6106_end_0 = const()[name = string("op_6106_end_0"), val = tensor([33, 1, 2, 2048, 128])]; tensor var_6106_end_mask_0 = const()[name = string("op_6106_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6106_squeeze_mask_0 = const()[name = string("op_6106_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_33_1 = cast(dtype = key_cache_internal_tensor_assign_33_dtype_0, x = coreml_update_state_136)[name = string("cast_14")]; tensor var_6106 = slice_by_index(begin = var_6106_begin_0, end = var_6106_end_0, end_mask = var_6106_end_mask_0, squeeze_mask = var_6106_squeeze_mask_0, x = key_cache_internal_tensor_assign_33_1)[name = string("op_6106")]; tensor var_6109_begin_0 = const()[name = string("op_6109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6109_end_mask_0 = const()[name = string("op_6109_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6109 = slice_by_index(begin = var_6109_begin_0, end = concat_11, end_mask = var_6109_end_mask_0, x = var_6106)[name = string("op_6109")]; tensor var_6111_begin_0 = const()[name = string("op_6111_begin_0"), val = tensor([32, 0, 0, 0, 0])]; tensor var_6111_end_0 = const()[name = string("op_6111_end_0"), val = tensor([33, 1, 2, 2048, 128])]; tensor var_6111_end_mask_0 = const()[name = string("op_6111_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6111_squeeze_mask_0 = const()[name = string("op_6111_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_33_1 = cast(dtype = value_cache_internal_tensor_assign_33_dtype_0, x = coreml_update_state_137)[name = string("cast_13")]; tensor var_6111 = slice_by_index(begin = var_6111_begin_0, end = var_6111_end_0, end_mask = var_6111_end_mask_0, squeeze_mask = var_6111_squeeze_mask_0, x = value_cache_internal_tensor_assign_33_1)[name = string("op_6111")]; tensor var_6114_begin_0 = const()[name = string("op_6114_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6114_end_mask_0 = const()[name = string("op_6114_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6114 = slice_by_index(begin = var_6114_begin_0, end = concat_11, end_mask = var_6114_end_mask_0, x = var_6111)[name = string("op_6114")]; tensor var_6116_shape = shape(x = var_6109)[name = string("op_6116_shape")]; int32 gather_589 = const()[name = string("gather_589"), val = int32(1)]; int32 gather_590 = const()[name = string("gather_590"), val = int32(2)]; int32 select_591 = const()[name = string("select_591"), val = int32(2)]; int32 gather_591_axis_0 = const()[name = string("gather_591_axis_0"), val = int32(0)]; int32 gather_591_batch_dims_0 = const()[name = string("gather_591_batch_dims_0"), val = int32(0)]; bool gather_591_validate_indices_0 = const()[name = string("gather_591_validate_indices_0"), val = bool(false)]; int32 gather_591 = gather(axis = gather_591_axis_0, batch_dims = gather_591_batch_dims_0, indices = select_591, validate_indices = gather_591_validate_indices_0, x = var_6116_shape)[name = string("gather_591")]; int32 gather_592 = const()[name = string("gather_592"), val = int32(128)]; tensor var_6123_axes_0 = const()[name = string("op_6123_axes_0"), val = tensor([2])]; tensor var_6123 = expand_dims(axes = var_6123_axes_0, x = var_6109)[name = string("op_6123")]; tensor shape_657 = shape(x = var_6123)[name = string("shape_657")]; int32 concat_621_axis_0 = const()[name = string("concat_621_axis_0"), val = int32(0)]; bool concat_621_interleave_0 = const()[name = string("concat_621_interleave_0"), val = bool(false)]; tensor concat_621 = concat(axis = concat_621_axis_0, interleave = concat_621_interleave_0, values = (gather_589, gather_590, var_100, gather_591, gather_592))[name = string("concat_621")]; tensor real_div_64 = real_div(x = concat_621, y = shape_657)[name = string("real_div_64")]; tensor hidden_states_971 = tile(reps = real_div_64, x = var_6123)[name = string("hidden_states_971")]; tensor concat_622x = const()[name = string("concat_622x"), val = tensor([1, 16, -1, 128])]; tensor key_states_131 = reshape(shape = concat_622x, x = hidden_states_971)[name = string("key_states_131")]; tensor var_6133_shape = shape(x = var_6114)[name = string("op_6133_shape")]; int32 gather_593 = const()[name = string("gather_593"), val = int32(1)]; int32 gather_594 = const()[name = string("gather_594"), val = int32(2)]; int32 select_595 = const()[name = string("select_595"), val = int32(2)]; int32 gather_595_axis_0 = const()[name = string("gather_595_axis_0"), val = int32(0)]; int32 gather_595_batch_dims_0 = const()[name = string("gather_595_batch_dims_0"), val = int32(0)]; bool gather_595_validate_indices_0 = const()[name = string("gather_595_validate_indices_0"), val = bool(false)]; int32 gather_595 = gather(axis = gather_595_axis_0, batch_dims = gather_595_batch_dims_0, indices = select_595, validate_indices = gather_595_validate_indices_0, x = var_6133_shape)[name = string("gather_595")]; int32 gather_596 = const()[name = string("gather_596"), val = int32(128)]; tensor var_6140_axes_0 = const()[name = string("op_6140_axes_0"), val = tensor([2])]; tensor var_6140 = expand_dims(axes = var_6140_axes_0, x = var_6114)[name = string("op_6140")]; tensor shape_662 = shape(x = var_6140)[name = string("shape_662")]; int32 concat_623_axis_0 = const()[name = string("concat_623_axis_0"), val = int32(0)]; bool concat_623_interleave_0 = const()[name = string("concat_623_interleave_0"), val = bool(false)]; tensor concat_623 = concat(axis = concat_623_axis_0, interleave = concat_623_interleave_0, values = (gather_593, gather_594, var_100, gather_595, gather_596))[name = string("concat_623")]; tensor real_div_65 = real_div(x = concat_623, y = shape_662)[name = string("real_div_65")]; tensor hidden_states_975 = tile(reps = real_div_65, x = var_6140)[name = string("hidden_states_975")]; tensor concat_624x = const()[name = string("concat_624x"), val = tensor([1, 16, -1, 128])]; tensor value_states_131 = reshape(shape = concat_624x, x = hidden_states_975)[name = string("value_states_131")]; tensor var_6150_shape = shape(x = key_states_131)[name = string("op_6150_shape")]; int32 select_597 = const()[name = string("select_597"), val = int32(2)]; int32 gather_597_axis_0 = const()[name = string("gather_597_axis_0"), val = int32(0)]; int32 gather_597_batch_dims_0 = const()[name = string("gather_597_batch_dims_0"), val = int32(0)]; bool gather_597_validate_indices_0 = const()[name = string("gather_597_validate_indices_0"), val = bool(false)]; int32 gather_597 = gather(axis = gather_597_axis_0, batch_dims = gather_597_batch_dims_0, indices = select_597, validate_indices = gather_597_validate_indices_0, x = var_6150_shape)[name = string("gather_597")]; int32 concat_625_values0_0 = const()[name = string("concat_625_values0_0"), val = int32(1)]; int32 concat_625_values1_0 = const()[name = string("concat_625_values1_0"), val = int32(1)]; int32 concat_625_values2_0 = const()[name = string("concat_625_values2_0"), val = int32(0)]; int32 concat_625_axis_0 = const()[name = string("concat_625_axis_0"), val = int32(0)]; bool concat_625_interleave_0 = const()[name = string("concat_625_interleave_0"), val = bool(false)]; tensor concat_625 = concat(axis = concat_625_axis_0, interleave = concat_625_interleave_0, values = (concat_625_values0_0, concat_625_values1_0, concat_625_values2_0, gather_597))[name = string("concat_625")]; tensor causal_mask_67_begin_0 = const()[name = string("causal_mask_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_67_end_mask_0 = const()[name = string("causal_mask_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_67 = slice_by_index(begin = causal_mask_67_begin_0, end = concat_625, end_mask = causal_mask_67_end_mask_0, x = cast_0)[name = string("causal_mask_67")]; tensor attn_output_129 = scaled_dot_product_attention(attn_mask = causal_mask_67, key = key_states_131, query = query_states_131, value = value_states_131)[name = string("attn_output_129")]; tensor var_6156_perm_0 = const()[name = string("op_6156_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_626x = const()[name = string("concat_626x"), val = tensor([1, -1, 2048])]; tensor var_6156 = transpose(perm = var_6156_perm_0, x = attn_output_129)[name = string("transpose_12")]; tensor input_257 = reshape(shape = concat_626x, x = var_6156)[name = string("input_257")]; tensor linear_227 = linear(bias = linear_3_bias_0, weight = model_model_layers_32_self_attn_o_proj_weight_quantized, x = input_257)[name = string("linear_227")]; tensor hidden_states_979 = add(x = hidden_states_959, y = linear_227)[name = string("hidden_states_979")]; fp32 var_94_promoted_65 = const()[name = string("op_94_promoted_65"), val = fp32(0x1p+1)]; tensor var_6165 = pow(x = hidden_states_979, y = var_94_promoted_65)[name = string("op_6165")]; tensor variance_131_axes_0 = const()[name = string("variance_131_axes_0"), val = tensor([-1])]; bool variance_131_keep_dims_0 = const()[name = string("variance_131_keep_dims_0"), val = bool(true)]; tensor variance_131 = reduce_mean(axes = variance_131_axes_0, keep_dims = variance_131_keep_dims_0, x = var_6165)[name = string("variance_131")]; fp32 var_6168 = const()[name = string("op_6168"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6169 = add(x = variance_131, y = var_6168)[name = string("op_6169")]; fp32 var_6170_epsilon_0 = const()[name = string("op_6170_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6170 = rsqrt(epsilon = var_6170_epsilon_0, x = var_6169)[name = string("op_6170")]; tensor hidden_states_983 = mul(x = hidden_states_979, y = var_6170)[name = string("hidden_states_983")]; tensor input_259 = mul(x = model_model_layers_32_post_attention_layernorm_weight, y = hidden_states_983)[name = string("input_259")]; tensor linear_228 = linear(bias = linear_4_bias_0, weight = model_model_layers_32_mlp_gate_proj_weight_quantized, x = input_259)[name = string("linear_228")]; tensor var_6182 = silu(x = linear_228)[name = string("op_6182")]; tensor linear_229 = linear(bias = linear_4_bias_0, weight = model_model_layers_32_mlp_up_proj_weight_quantized, x = input_259)[name = string("linear_229")]; tensor input_263 = mul(x = var_6182, y = linear_229)[name = string("input_263")]; tensor linear_230 = linear(bias = linear_3_bias_0, weight = model_model_layers_32_mlp_down_proj_weight_quantized, x = input_263)[name = string("linear_230")]; tensor hidden_states_989 = add(x = hidden_states_979, y = linear_230)[name = string("hidden_states_989")]; fp32 var_94_promoted_66 = const()[name = string("op_94_promoted_66"), val = fp32(0x1p+1)]; tensor var_6195 = pow(x = hidden_states_989, y = var_94_promoted_66)[name = string("op_6195")]; tensor variance_133_axes_0 = const()[name = string("variance_133_axes_0"), val = tensor([-1])]; bool variance_133_keep_dims_0 = const()[name = string("variance_133_keep_dims_0"), val = bool(true)]; tensor variance_133 = reduce_mean(axes = variance_133_axes_0, keep_dims = variance_133_keep_dims_0, x = var_6195)[name = string("variance_133")]; fp32 var_6198 = const()[name = string("op_6198"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6199 = add(x = variance_133, y = var_6198)[name = string("op_6199")]; fp32 var_6200_epsilon_0 = const()[name = string("op_6200_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6200 = rsqrt(epsilon = var_6200_epsilon_0, x = var_6199)[name = string("op_6200")]; tensor hidden_states_993 = mul(x = hidden_states_989, y = var_6200)[name = string("hidden_states_993")]; tensor hidden_states_997 = mul(x = model_model_layers_33_input_layernorm_weight, y = hidden_states_993)[name = string("hidden_states_997")]; tensor linear_231 = linear(bias = model_model_layers_33_self_attn_q_proj_bias, weight = model_model_layers_33_self_attn_q_proj_weight_quantized, x = hidden_states_997)[name = string("linear_231")]; tensor linear_232 = linear(bias = model_model_layers_33_self_attn_k_proj_bias, weight = model_model_layers_33_self_attn_k_proj_weight_quantized, x = hidden_states_997)[name = string("linear_232")]; tensor linear_233 = linear(bias = model_model_layers_33_self_attn_v_proj_bias, weight = model_model_layers_33_self_attn_v_proj_weight_quantized, x = hidden_states_997)[name = string("linear_233")]; tensor concat_627x = const()[name = string("concat_627x"), val = tensor([1, -1, 16, 128])]; tensor var_6223 = reshape(shape = concat_627x, x = linear_231)[name = string("op_6223")]; tensor q_67_perm_0 = const()[name = string("q_67_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_628x = const()[name = string("concat_628x"), val = tensor([1, -1, 2, 128])]; tensor var_6226 = reshape(shape = concat_628x, x = linear_232)[name = string("op_6226")]; tensor k_67_perm_0 = const()[name = string("k_67_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_629x = const()[name = string("concat_629x"), val = tensor([1, -1, 2, 128])]; tensor var_6229 = reshape(shape = concat_629x, x = linear_233)[name = string("op_6229")]; tensor v_state_67_perm_0 = const()[name = string("v_state_67_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_67 = transpose(perm = q_67_perm_0, x = var_6223)[name = string("transpose_11")]; tensor var_6233 = mul(x = q_67, y = cos_7)[name = string("op_6233")]; tensor x1_133_begin_0 = const()[name = string("x1_133_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_133_end_0 = const()[name = string("x1_133_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_133_end_mask_0 = const()[name = string("x1_133_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_133 = slice_by_index(begin = x1_133_begin_0, end = x1_133_end_0, end_mask = x1_133_end_mask_0, x = q_67)[name = string("x1_133")]; tensor x2_133_begin_0 = const()[name = string("x2_133_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_133_end_0 = const()[name = string("x2_133_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_133_end_mask_0 = const()[name = string("x2_133_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_133 = slice_by_index(begin = x2_133_begin_0, end = x2_133_end_0, end_mask = x2_133_end_mask_0, x = q_67)[name = string("x2_133")]; fp32 const_69_promoted = const()[name = string("const_69_promoted"), val = fp32(-0x1p+0)]; tensor var_6244 = mul(x = x2_133, y = const_69_promoted)[name = string("op_6244")]; bool var_6246_interleave_0 = const()[name = string("op_6246_interleave_0"), val = bool(false)]; tensor var_6246 = concat(axis = var_88, interleave = var_6246_interleave_0, values = (var_6244, x1_133))[name = string("op_6246")]; tensor var_6247 = mul(x = var_6246, y = sin_7)[name = string("op_6247")]; tensor query_states_135 = add(x = var_6233, y = var_6247)[name = string("query_states_135")]; tensor k_67 = transpose(perm = k_67_perm_0, x = var_6226)[name = string("transpose_10")]; tensor var_6249 = mul(x = k_67, y = cos_7)[name = string("op_6249")]; tensor x1_135_begin_0 = const()[name = string("x1_135_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_135_end_0 = const()[name = string("x1_135_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_135_end_mask_0 = const()[name = string("x1_135_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_135 = slice_by_index(begin = x1_135_begin_0, end = x1_135_end_0, end_mask = x1_135_end_mask_0, x = k_67)[name = string("x1_135")]; tensor x2_135_begin_0 = const()[name = string("x2_135_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_135_end_0 = const()[name = string("x2_135_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_135_end_mask_0 = const()[name = string("x2_135_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_135 = slice_by_index(begin = x2_135_begin_0, end = x2_135_end_0, end_mask = x2_135_end_mask_0, x = k_67)[name = string("x2_135")]; fp32 const_70_promoted = const()[name = string("const_70_promoted"), val = fp32(-0x1p+0)]; tensor var_6260 = mul(x = x2_135, y = const_70_promoted)[name = string("op_6260")]; bool var_6262_interleave_0 = const()[name = string("op_6262_interleave_0"), val = bool(false)]; tensor var_6262 = concat(axis = var_88, interleave = var_6262_interleave_0, values = (var_6260, x1_135))[name = string("op_6262")]; tensor var_6263 = mul(x = var_6262, y = sin_7)[name = string("op_6263")]; tensor k_state_67 = add(x = var_6249, y = var_6263)[name = string("k_state_67")]; tensor expand_dims_396 = const()[name = string("expand_dims_396"), val = tensor([0])]; tensor expand_dims_397 = const()[name = string("expand_dims_397"), val = tensor([0])]; tensor expand_dims_399 = const()[name = string("expand_dims_399"), val = tensor([0])]; tensor concat_632_values0_0 = const()[name = string("concat_632_values0_0"), val = tensor([33])]; int32 concat_632_axis_0 = const()[name = string("concat_632_axis_0"), val = int32(0)]; bool concat_632_interleave_0 = const()[name = string("concat_632_interleave_0"), val = bool(false)]; tensor concat_632 = concat(axis = concat_632_axis_0, interleave = concat_632_interleave_0, values = (concat_632_values0_0, expand_dims_396, expand_dims_397, expand_dims_2, expand_dims_399))[name = string("concat_632")]; tensor key_cache_internal_tensor_assign_34_stride_0 = const()[name = string("key_cache_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_34_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_34_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_34 = slice_update(begin = concat_632, begin_mask = key_cache_internal_tensor_assign_34_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_34_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_34_squeeze_mask_0, stride = key_cache_internal_tensor_assign_34_stride_0, update = k_state_67, x = key_cache_internal_tensor_assign_33_1)[name = string("key_cache_internal_tensor_assign_34")]; string cast_475_dtype_0 = const()[name = string("cast_475_dtype_0"), val = string("fp16")]; tensor cast_475 = cast(dtype = cast_475_dtype_0, x = key_cache_internal_tensor_assign_34)[name = string("cast_12")]; write_state(data = cast_475, input = key_cache)[name = string("coreml_update_state_138_write_state")]; tensor coreml_update_state_138 = read_state(input = key_cache)[name = string("coreml_update_state_138")]; string key_cache_internal_tensor_assign_34_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_34_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_34_stride_0 = const()[name = string("value_cache_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_34_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_34_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_67 = transpose(perm = v_state_67_perm_0, x = var_6229)[name = string("transpose_9")]; tensor value_cache_internal_tensor_assign_34 = slice_update(begin = concat_632, begin_mask = value_cache_internal_tensor_assign_34_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_34_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_34_squeeze_mask_0, stride = value_cache_internal_tensor_assign_34_stride_0, update = v_state_67, x = value_cache_internal_tensor_assign_33_1)[name = string("value_cache_internal_tensor_assign_34")]; string cast_476_dtype_0 = const()[name = string("cast_476_dtype_0"), val = string("fp16")]; tensor cast_476 = cast(dtype = cast_476_dtype_0, x = value_cache_internal_tensor_assign_34)[name = string("cast_11")]; write_state(data = cast_476, input = value_cache)[name = string("coreml_update_state_139_write_state")]; tensor coreml_update_state_139 = read_state(input = value_cache)[name = string("coreml_update_state_139")]; string value_cache_internal_tensor_assign_34_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_34_dtype_0"), val = string("fp32")]; tensor var_6286_begin_0 = const()[name = string("op_6286_begin_0"), val = tensor([33, 0, 0, 0, 0])]; tensor var_6286_end_0 = const()[name = string("op_6286_end_0"), val = tensor([34, 1, 2, 2048, 128])]; tensor var_6286_end_mask_0 = const()[name = string("op_6286_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6286_squeeze_mask_0 = const()[name = string("op_6286_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_34_1 = cast(dtype = key_cache_internal_tensor_assign_34_dtype_0, x = coreml_update_state_138)[name = string("cast_10")]; tensor var_6286 = slice_by_index(begin = var_6286_begin_0, end = var_6286_end_0, end_mask = var_6286_end_mask_0, squeeze_mask = var_6286_squeeze_mask_0, x = key_cache_internal_tensor_assign_34_1)[name = string("op_6286")]; tensor var_6289_begin_0 = const()[name = string("op_6289_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6289_end_mask_0 = const()[name = string("op_6289_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6289 = slice_by_index(begin = var_6289_begin_0, end = concat_11, end_mask = var_6289_end_mask_0, x = var_6286)[name = string("op_6289")]; tensor var_6291_begin_0 = const()[name = string("op_6291_begin_0"), val = tensor([33, 0, 0, 0, 0])]; tensor var_6291_end_0 = const()[name = string("op_6291_end_0"), val = tensor([34, 1, 2, 2048, 128])]; tensor var_6291_end_mask_0 = const()[name = string("op_6291_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6291_squeeze_mask_0 = const()[name = string("op_6291_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_34_1 = cast(dtype = value_cache_internal_tensor_assign_34_dtype_0, x = coreml_update_state_139)[name = string("cast_9")]; tensor var_6291 = slice_by_index(begin = var_6291_begin_0, end = var_6291_end_0, end_mask = var_6291_end_mask_0, squeeze_mask = var_6291_squeeze_mask_0, x = value_cache_internal_tensor_assign_34_1)[name = string("op_6291")]; tensor var_6294_begin_0 = const()[name = string("op_6294_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6294_end_mask_0 = const()[name = string("op_6294_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6294 = slice_by_index(begin = var_6294_begin_0, end = concat_11, end_mask = var_6294_end_mask_0, x = var_6291)[name = string("op_6294")]; tensor var_6296_shape = shape(x = var_6289)[name = string("op_6296_shape")]; int32 gather_607 = const()[name = string("gather_607"), val = int32(1)]; int32 gather_608 = const()[name = string("gather_608"), val = int32(2)]; int32 select_609 = const()[name = string("select_609"), val = int32(2)]; int32 gather_609_axis_0 = const()[name = string("gather_609_axis_0"), val = int32(0)]; int32 gather_609_batch_dims_0 = const()[name = string("gather_609_batch_dims_0"), val = int32(0)]; bool gather_609_validate_indices_0 = const()[name = string("gather_609_validate_indices_0"), val = bool(false)]; int32 gather_609 = gather(axis = gather_609_axis_0, batch_dims = gather_609_batch_dims_0, indices = select_609, validate_indices = gather_609_validate_indices_0, x = var_6296_shape)[name = string("gather_609")]; int32 gather_610 = const()[name = string("gather_610"), val = int32(128)]; tensor var_6303_axes_0 = const()[name = string("op_6303_axes_0"), val = tensor([2])]; tensor var_6303 = expand_dims(axes = var_6303_axes_0, x = var_6289)[name = string("op_6303")]; tensor shape_677 = shape(x = var_6303)[name = string("shape_677")]; int32 concat_640_axis_0 = const()[name = string("concat_640_axis_0"), val = int32(0)]; bool concat_640_interleave_0 = const()[name = string("concat_640_interleave_0"), val = bool(false)]; tensor concat_640 = concat(axis = concat_640_axis_0, interleave = concat_640_interleave_0, values = (gather_607, gather_608, var_100, gather_609, gather_610))[name = string("concat_640")]; tensor real_div_66 = real_div(x = concat_640, y = shape_677)[name = string("real_div_66")]; tensor hidden_states_1001 = tile(reps = real_div_66, x = var_6303)[name = string("hidden_states_1001")]; tensor concat_641x = const()[name = string("concat_641x"), val = tensor([1, 16, -1, 128])]; tensor key_states_135 = reshape(shape = concat_641x, x = hidden_states_1001)[name = string("key_states_135")]; tensor var_6313_shape = shape(x = var_6294)[name = string("op_6313_shape")]; int32 gather_611 = const()[name = string("gather_611"), val = int32(1)]; int32 gather_612 = const()[name = string("gather_612"), val = int32(2)]; int32 select_613 = const()[name = string("select_613"), val = int32(2)]; int32 gather_613_axis_0 = const()[name = string("gather_613_axis_0"), val = int32(0)]; int32 gather_613_batch_dims_0 = const()[name = string("gather_613_batch_dims_0"), val = int32(0)]; bool gather_613_validate_indices_0 = const()[name = string("gather_613_validate_indices_0"), val = bool(false)]; int32 gather_613 = gather(axis = gather_613_axis_0, batch_dims = gather_613_batch_dims_0, indices = select_613, validate_indices = gather_613_validate_indices_0, x = var_6313_shape)[name = string("gather_613")]; int32 gather_614 = const()[name = string("gather_614"), val = int32(128)]; tensor var_6320_axes_0 = const()[name = string("op_6320_axes_0"), val = tensor([2])]; tensor var_6320 = expand_dims(axes = var_6320_axes_0, x = var_6294)[name = string("op_6320")]; tensor shape_682 = shape(x = var_6320)[name = string("shape_682")]; int32 concat_642_axis_0 = const()[name = string("concat_642_axis_0"), val = int32(0)]; bool concat_642_interleave_0 = const()[name = string("concat_642_interleave_0"), val = bool(false)]; tensor concat_642 = concat(axis = concat_642_axis_0, interleave = concat_642_interleave_0, values = (gather_611, gather_612, var_100, gather_613, gather_614))[name = string("concat_642")]; tensor real_div_67 = real_div(x = concat_642, y = shape_682)[name = string("real_div_67")]; tensor hidden_states_1005 = tile(reps = real_div_67, x = var_6320)[name = string("hidden_states_1005")]; tensor concat_643x = const()[name = string("concat_643x"), val = tensor([1, 16, -1, 128])]; tensor value_states_135 = reshape(shape = concat_643x, x = hidden_states_1005)[name = string("value_states_135")]; tensor var_6330_shape = shape(x = key_states_135)[name = string("op_6330_shape")]; int32 select_615 = const()[name = string("select_615"), val = int32(2)]; int32 gather_615_axis_0 = const()[name = string("gather_615_axis_0"), val = int32(0)]; int32 gather_615_batch_dims_0 = const()[name = string("gather_615_batch_dims_0"), val = int32(0)]; bool gather_615_validate_indices_0 = const()[name = string("gather_615_validate_indices_0"), val = bool(false)]; int32 gather_615 = gather(axis = gather_615_axis_0, batch_dims = gather_615_batch_dims_0, indices = select_615, validate_indices = gather_615_validate_indices_0, x = var_6330_shape)[name = string("gather_615")]; int32 concat_644_values0_0 = const()[name = string("concat_644_values0_0"), val = int32(1)]; int32 concat_644_values1_0 = const()[name = string("concat_644_values1_0"), val = int32(1)]; int32 concat_644_values2_0 = const()[name = string("concat_644_values2_0"), val = int32(0)]; int32 concat_644_axis_0 = const()[name = string("concat_644_axis_0"), val = int32(0)]; bool concat_644_interleave_0 = const()[name = string("concat_644_interleave_0"), val = bool(false)]; tensor concat_644 = concat(axis = concat_644_axis_0, interleave = concat_644_interleave_0, values = (concat_644_values0_0, concat_644_values1_0, concat_644_values2_0, gather_615))[name = string("concat_644")]; tensor causal_mask_69_begin_0 = const()[name = string("causal_mask_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_69_end_mask_0 = const()[name = string("causal_mask_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_69 = slice_by_index(begin = causal_mask_69_begin_0, end = concat_644, end_mask = causal_mask_69_end_mask_0, x = cast_0)[name = string("causal_mask_69")]; tensor attn_output_133 = scaled_dot_product_attention(attn_mask = causal_mask_69, key = key_states_135, query = query_states_135, value = value_states_135)[name = string("attn_output_133")]; tensor var_6336_perm_0 = const()[name = string("op_6336_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_645x = const()[name = string("concat_645x"), val = tensor([1, -1, 2048])]; tensor var_6336 = transpose(perm = var_6336_perm_0, x = attn_output_133)[name = string("transpose_8")]; tensor input_265 = reshape(shape = concat_645x, x = var_6336)[name = string("input_265")]; tensor linear_234 = linear(bias = linear_3_bias_0, weight = model_model_layers_33_self_attn_o_proj_weight_quantized, x = input_265)[name = string("linear_234")]; tensor hidden_states_1009 = add(x = hidden_states_989, y = linear_234)[name = string("hidden_states_1009")]; fp32 var_94_promoted_67 = const()[name = string("op_94_promoted_67"), val = fp32(0x1p+1)]; tensor var_6345 = pow(x = hidden_states_1009, y = var_94_promoted_67)[name = string("op_6345")]; tensor variance_135_axes_0 = const()[name = string("variance_135_axes_0"), val = tensor([-1])]; bool variance_135_keep_dims_0 = const()[name = string("variance_135_keep_dims_0"), val = bool(true)]; tensor variance_135 = reduce_mean(axes = variance_135_axes_0, keep_dims = variance_135_keep_dims_0, x = var_6345)[name = string("variance_135")]; fp32 var_6348 = const()[name = string("op_6348"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6349 = add(x = variance_135, y = var_6348)[name = string("op_6349")]; fp32 var_6350_epsilon_0 = const()[name = string("op_6350_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6350 = rsqrt(epsilon = var_6350_epsilon_0, x = var_6349)[name = string("op_6350")]; tensor hidden_states_1013 = mul(x = hidden_states_1009, y = var_6350)[name = string("hidden_states_1013")]; tensor input_267 = mul(x = model_model_layers_33_post_attention_layernorm_weight, y = hidden_states_1013)[name = string("input_267")]; tensor linear_235 = linear(bias = linear_4_bias_0, weight = model_model_layers_33_mlp_gate_proj_weight_quantized, x = input_267)[name = string("linear_235")]; tensor var_6362 = silu(x = linear_235)[name = string("op_6362")]; tensor linear_236 = linear(bias = linear_4_bias_0, weight = model_model_layers_33_mlp_up_proj_weight_quantized, x = input_267)[name = string("linear_236")]; tensor input_271 = mul(x = var_6362, y = linear_236)[name = string("input_271")]; tensor linear_237 = linear(bias = linear_3_bias_0, weight = model_model_layers_33_mlp_down_proj_weight_quantized, x = input_271)[name = string("linear_237")]; tensor hidden_states_1019 = add(x = hidden_states_1009, y = linear_237)[name = string("hidden_states_1019")]; fp32 var_94_promoted_68 = const()[name = string("op_94_promoted_68"), val = fp32(0x1p+1)]; tensor var_6375 = pow(x = hidden_states_1019, y = var_94_promoted_68)[name = string("op_6375")]; tensor variance_137_axes_0 = const()[name = string("variance_137_axes_0"), val = tensor([-1])]; bool variance_137_keep_dims_0 = const()[name = string("variance_137_keep_dims_0"), val = bool(true)]; tensor variance_137 = reduce_mean(axes = variance_137_axes_0, keep_dims = variance_137_keep_dims_0, x = var_6375)[name = string("variance_137")]; fp32 var_6378 = const()[name = string("op_6378"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6379 = add(x = variance_137, y = var_6378)[name = string("op_6379")]; fp32 var_6380_epsilon_0 = const()[name = string("op_6380_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6380 = rsqrt(epsilon = var_6380_epsilon_0, x = var_6379)[name = string("op_6380")]; tensor hidden_states_1023 = mul(x = hidden_states_1019, y = var_6380)[name = string("hidden_states_1023")]; tensor hidden_states_1027 = mul(x = model_model_layers_34_input_layernorm_weight, y = hidden_states_1023)[name = string("hidden_states_1027")]; tensor linear_238 = linear(bias = model_model_layers_34_self_attn_q_proj_bias, weight = model_model_layers_34_self_attn_q_proj_weight_quantized, x = hidden_states_1027)[name = string("linear_238")]; tensor linear_239 = linear(bias = model_model_layers_34_self_attn_k_proj_bias, weight = model_model_layers_34_self_attn_k_proj_weight_quantized, x = hidden_states_1027)[name = string("linear_239")]; tensor linear_240 = linear(bias = model_model_layers_34_self_attn_v_proj_bias, weight = model_model_layers_34_self_attn_v_proj_weight_quantized, x = hidden_states_1027)[name = string("linear_240")]; tensor concat_646x = const()[name = string("concat_646x"), val = tensor([1, -1, 16, 128])]; tensor var_6403 = reshape(shape = concat_646x, x = linear_238)[name = string("op_6403")]; tensor q_69_perm_0 = const()[name = string("q_69_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_647x = const()[name = string("concat_647x"), val = tensor([1, -1, 2, 128])]; tensor var_6406 = reshape(shape = concat_647x, x = linear_239)[name = string("op_6406")]; tensor k_69_perm_0 = const()[name = string("k_69_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_648x = const()[name = string("concat_648x"), val = tensor([1, -1, 2, 128])]; tensor var_6409 = reshape(shape = concat_648x, x = linear_240)[name = string("op_6409")]; tensor v_state_69_perm_0 = const()[name = string("v_state_69_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_69 = transpose(perm = q_69_perm_0, x = var_6403)[name = string("transpose_7")]; tensor var_6413 = mul(x = q_69, y = cos_7)[name = string("op_6413")]; tensor x1_137_begin_0 = const()[name = string("x1_137_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_137_end_0 = const()[name = string("x1_137_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_137_end_mask_0 = const()[name = string("x1_137_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_137 = slice_by_index(begin = x1_137_begin_0, end = x1_137_end_0, end_mask = x1_137_end_mask_0, x = q_69)[name = string("x1_137")]; tensor x2_137_begin_0 = const()[name = string("x2_137_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_137_end_0 = const()[name = string("x2_137_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_137_end_mask_0 = const()[name = string("x2_137_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_137 = slice_by_index(begin = x2_137_begin_0, end = x2_137_end_0, end_mask = x2_137_end_mask_0, x = q_69)[name = string("x2_137")]; fp32 const_71_promoted = const()[name = string("const_71_promoted"), val = fp32(-0x1p+0)]; tensor var_6424 = mul(x = x2_137, y = const_71_promoted)[name = string("op_6424")]; bool var_6426_interleave_0 = const()[name = string("op_6426_interleave_0"), val = bool(false)]; tensor var_6426 = concat(axis = var_88, interleave = var_6426_interleave_0, values = (var_6424, x1_137))[name = string("op_6426")]; tensor var_6427 = mul(x = var_6426, y = sin_7)[name = string("op_6427")]; tensor query_states_139 = add(x = var_6413, y = var_6427)[name = string("query_states_139")]; tensor k_69 = transpose(perm = k_69_perm_0, x = var_6406)[name = string("transpose_6")]; tensor var_6429 = mul(x = k_69, y = cos_7)[name = string("op_6429")]; tensor x1_139_begin_0 = const()[name = string("x1_139_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_139_end_0 = const()[name = string("x1_139_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_139_end_mask_0 = const()[name = string("x1_139_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_139 = slice_by_index(begin = x1_139_begin_0, end = x1_139_end_0, end_mask = x1_139_end_mask_0, x = k_69)[name = string("x1_139")]; tensor x2_139_begin_0 = const()[name = string("x2_139_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_139_end_0 = const()[name = string("x2_139_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_139_end_mask_0 = const()[name = string("x2_139_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_139 = slice_by_index(begin = x2_139_begin_0, end = x2_139_end_0, end_mask = x2_139_end_mask_0, x = k_69)[name = string("x2_139")]; fp32 const_72_promoted = const()[name = string("const_72_promoted"), val = fp32(-0x1p+0)]; tensor var_6440 = mul(x = x2_139, y = const_72_promoted)[name = string("op_6440")]; bool var_6442_interleave_0 = const()[name = string("op_6442_interleave_0"), val = bool(false)]; tensor var_6442 = concat(axis = var_88, interleave = var_6442_interleave_0, values = (var_6440, x1_139))[name = string("op_6442")]; tensor var_6443 = mul(x = var_6442, y = sin_7)[name = string("op_6443")]; tensor k_state_69 = add(x = var_6429, y = var_6443)[name = string("k_state_69")]; tensor expand_dims_408 = const()[name = string("expand_dims_408"), val = tensor([0])]; tensor expand_dims_409 = const()[name = string("expand_dims_409"), val = tensor([0])]; tensor expand_dims_411 = const()[name = string("expand_dims_411"), val = tensor([0])]; tensor concat_651_values0_0 = const()[name = string("concat_651_values0_0"), val = tensor([34])]; int32 concat_651_axis_0 = const()[name = string("concat_651_axis_0"), val = int32(0)]; bool concat_651_interleave_0 = const()[name = string("concat_651_interleave_0"), val = bool(false)]; tensor concat_651 = concat(axis = concat_651_axis_0, interleave = concat_651_interleave_0, values = (concat_651_values0_0, expand_dims_408, expand_dims_409, expand_dims_2, expand_dims_411))[name = string("concat_651")]; tensor key_cache_internal_tensor_assign_35_stride_0 = const()[name = string("key_cache_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_35_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_35_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_35 = slice_update(begin = concat_651, begin_mask = key_cache_internal_tensor_assign_35_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_35_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_35_squeeze_mask_0, stride = key_cache_internal_tensor_assign_35_stride_0, update = k_state_69, x = key_cache_internal_tensor_assign_34_1)[name = string("key_cache_internal_tensor_assign_35")]; string cast_489_dtype_0 = const()[name = string("cast_489_dtype_0"), val = string("fp16")]; tensor cast_489 = cast(dtype = cast_489_dtype_0, x = key_cache_internal_tensor_assign_35)[name = string("cast_8")]; write_state(data = cast_489, input = key_cache)[name = string("coreml_update_state_140_write_state")]; tensor coreml_update_state_140 = read_state(input = key_cache)[name = string("coreml_update_state_140")]; string key_cache_internal_tensor_assign_35_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_35_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_35_stride_0 = const()[name = string("value_cache_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_35_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_35_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_69 = transpose(perm = v_state_69_perm_0, x = var_6409)[name = string("transpose_5")]; tensor value_cache_internal_tensor_assign_35 = slice_update(begin = concat_651, begin_mask = value_cache_internal_tensor_assign_35_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_35_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_35_squeeze_mask_0, stride = value_cache_internal_tensor_assign_35_stride_0, update = v_state_69, x = value_cache_internal_tensor_assign_34_1)[name = string("value_cache_internal_tensor_assign_35")]; string cast_490_dtype_0 = const()[name = string("cast_490_dtype_0"), val = string("fp16")]; tensor cast_490 = cast(dtype = cast_490_dtype_0, x = value_cache_internal_tensor_assign_35)[name = string("cast_7")]; write_state(data = cast_490, input = value_cache)[name = string("coreml_update_state_141_write_state")]; tensor coreml_update_state_141 = read_state(input = value_cache)[name = string("coreml_update_state_141")]; string value_cache_internal_tensor_assign_35_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_35_dtype_0"), val = string("fp32")]; tensor var_6466_begin_0 = const()[name = string("op_6466_begin_0"), val = tensor([34, 0, 0, 0, 0])]; tensor var_6466_end_0 = const()[name = string("op_6466_end_0"), val = tensor([35, 1, 2, 2048, 128])]; tensor var_6466_end_mask_0 = const()[name = string("op_6466_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6466_squeeze_mask_0 = const()[name = string("op_6466_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_35_1 = cast(dtype = key_cache_internal_tensor_assign_35_dtype_0, x = coreml_update_state_140)[name = string("cast_6")]; tensor var_6466 = slice_by_index(begin = var_6466_begin_0, end = var_6466_end_0, end_mask = var_6466_end_mask_0, squeeze_mask = var_6466_squeeze_mask_0, x = key_cache_internal_tensor_assign_35_1)[name = string("op_6466")]; tensor var_6469_begin_0 = const()[name = string("op_6469_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6469_end_mask_0 = const()[name = string("op_6469_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6469 = slice_by_index(begin = var_6469_begin_0, end = concat_11, end_mask = var_6469_end_mask_0, x = var_6466)[name = string("op_6469")]; tensor var_6471_begin_0 = const()[name = string("op_6471_begin_0"), val = tensor([34, 0, 0, 0, 0])]; tensor var_6471_end_0 = const()[name = string("op_6471_end_0"), val = tensor([35, 1, 2, 2048, 128])]; tensor var_6471_end_mask_0 = const()[name = string("op_6471_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6471_squeeze_mask_0 = const()[name = string("op_6471_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_35_1 = cast(dtype = value_cache_internal_tensor_assign_35_dtype_0, x = coreml_update_state_141)[name = string("cast_5")]; tensor var_6471 = slice_by_index(begin = var_6471_begin_0, end = var_6471_end_0, end_mask = var_6471_end_mask_0, squeeze_mask = var_6471_squeeze_mask_0, x = value_cache_internal_tensor_assign_35_1)[name = string("op_6471")]; tensor var_6474_begin_0 = const()[name = string("op_6474_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6474_end_mask_0 = const()[name = string("op_6474_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6474 = slice_by_index(begin = var_6474_begin_0, end = concat_11, end_mask = var_6474_end_mask_0, x = var_6471)[name = string("op_6474")]; tensor var_6476_shape = shape(x = var_6469)[name = string("op_6476_shape")]; int32 gather_625 = const()[name = string("gather_625"), val = int32(1)]; int32 gather_626 = const()[name = string("gather_626"), val = int32(2)]; int32 select_627 = const()[name = string("select_627"), val = int32(2)]; int32 gather_627_axis_0 = const()[name = string("gather_627_axis_0"), val = int32(0)]; int32 gather_627_batch_dims_0 = const()[name = string("gather_627_batch_dims_0"), val = int32(0)]; bool gather_627_validate_indices_0 = const()[name = string("gather_627_validate_indices_0"), val = bool(false)]; int32 gather_627 = gather(axis = gather_627_axis_0, batch_dims = gather_627_batch_dims_0, indices = select_627, validate_indices = gather_627_validate_indices_0, x = var_6476_shape)[name = string("gather_627")]; int32 gather_628 = const()[name = string("gather_628"), val = int32(128)]; tensor var_6483_axes_0 = const()[name = string("op_6483_axes_0"), val = tensor([2])]; tensor var_6483 = expand_dims(axes = var_6483_axes_0, x = var_6469)[name = string("op_6483")]; tensor shape_697 = shape(x = var_6483)[name = string("shape_697")]; int32 concat_659_axis_0 = const()[name = string("concat_659_axis_0"), val = int32(0)]; bool concat_659_interleave_0 = const()[name = string("concat_659_interleave_0"), val = bool(false)]; tensor concat_659 = concat(axis = concat_659_axis_0, interleave = concat_659_interleave_0, values = (gather_625, gather_626, var_100, gather_627, gather_628))[name = string("concat_659")]; tensor real_div_68 = real_div(x = concat_659, y = shape_697)[name = string("real_div_68")]; tensor hidden_states_1031 = tile(reps = real_div_68, x = var_6483)[name = string("hidden_states_1031")]; tensor concat_660x = const()[name = string("concat_660x"), val = tensor([1, 16, -1, 128])]; tensor key_states_139 = reshape(shape = concat_660x, x = hidden_states_1031)[name = string("key_states_139")]; tensor var_6493_shape = shape(x = var_6474)[name = string("op_6493_shape")]; int32 gather_629 = const()[name = string("gather_629"), val = int32(1)]; int32 gather_630 = const()[name = string("gather_630"), val = int32(2)]; int32 select_631 = const()[name = string("select_631"), val = int32(2)]; int32 gather_631_axis_0 = const()[name = string("gather_631_axis_0"), val = int32(0)]; int32 gather_631_batch_dims_0 = const()[name = string("gather_631_batch_dims_0"), val = int32(0)]; bool gather_631_validate_indices_0 = const()[name = string("gather_631_validate_indices_0"), val = bool(false)]; int32 gather_631 = gather(axis = gather_631_axis_0, batch_dims = gather_631_batch_dims_0, indices = select_631, validate_indices = gather_631_validate_indices_0, x = var_6493_shape)[name = string("gather_631")]; int32 gather_632 = const()[name = string("gather_632"), val = int32(128)]; tensor var_6500_axes_0 = const()[name = string("op_6500_axes_0"), val = tensor([2])]; tensor var_6500 = expand_dims(axes = var_6500_axes_0, x = var_6474)[name = string("op_6500")]; tensor shape_702 = shape(x = var_6500)[name = string("shape_702")]; int32 concat_661_axis_0 = const()[name = string("concat_661_axis_0"), val = int32(0)]; bool concat_661_interleave_0 = const()[name = string("concat_661_interleave_0"), val = bool(false)]; tensor concat_661 = concat(axis = concat_661_axis_0, interleave = concat_661_interleave_0, values = (gather_629, gather_630, var_100, gather_631, gather_632))[name = string("concat_661")]; tensor real_div_69 = real_div(x = concat_661, y = shape_702)[name = string("real_div_69")]; tensor hidden_states_1035 = tile(reps = real_div_69, x = var_6500)[name = string("hidden_states_1035")]; tensor concat_662x = const()[name = string("concat_662x"), val = tensor([1, 16, -1, 128])]; tensor value_states_139 = reshape(shape = concat_662x, x = hidden_states_1035)[name = string("value_states_139")]; tensor var_6510_shape = shape(x = key_states_139)[name = string("op_6510_shape")]; int32 select_633 = const()[name = string("select_633"), val = int32(2)]; int32 gather_633_axis_0 = const()[name = string("gather_633_axis_0"), val = int32(0)]; int32 gather_633_batch_dims_0 = const()[name = string("gather_633_batch_dims_0"), val = int32(0)]; bool gather_633_validate_indices_0 = const()[name = string("gather_633_validate_indices_0"), val = bool(false)]; int32 gather_633 = gather(axis = gather_633_axis_0, batch_dims = gather_633_batch_dims_0, indices = select_633, validate_indices = gather_633_validate_indices_0, x = var_6510_shape)[name = string("gather_633")]; int32 concat_663_values0_0 = const()[name = string("concat_663_values0_0"), val = int32(1)]; int32 concat_663_values1_0 = const()[name = string("concat_663_values1_0"), val = int32(1)]; int32 concat_663_values2_0 = const()[name = string("concat_663_values2_0"), val = int32(0)]; int32 concat_663_axis_0 = const()[name = string("concat_663_axis_0"), val = int32(0)]; bool concat_663_interleave_0 = const()[name = string("concat_663_interleave_0"), val = bool(false)]; tensor concat_663 = concat(axis = concat_663_axis_0, interleave = concat_663_interleave_0, values = (concat_663_values0_0, concat_663_values1_0, concat_663_values2_0, gather_633))[name = string("concat_663")]; tensor causal_mask_71_begin_0 = const()[name = string("causal_mask_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_71_end_mask_0 = const()[name = string("causal_mask_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_71 = slice_by_index(begin = causal_mask_71_begin_0, end = concat_663, end_mask = causal_mask_71_end_mask_0, x = cast_0)[name = string("causal_mask_71")]; tensor attn_output_137 = scaled_dot_product_attention(attn_mask = causal_mask_71, key = key_states_139, query = query_states_139, value = value_states_139)[name = string("attn_output_137")]; tensor var_6516_perm_0 = const()[name = string("op_6516_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_664x = const()[name = string("concat_664x"), val = tensor([1, -1, 2048])]; tensor var_6516 = transpose(perm = var_6516_perm_0, x = attn_output_137)[name = string("transpose_4")]; tensor input_273 = reshape(shape = concat_664x, x = var_6516)[name = string("input_273")]; tensor linear_241 = linear(bias = linear_3_bias_0, weight = model_model_layers_34_self_attn_o_proj_weight_quantized, x = input_273)[name = string("linear_241")]; tensor hidden_states_1039 = add(x = hidden_states_1019, y = linear_241)[name = string("hidden_states_1039")]; fp32 var_94_promoted_69 = const()[name = string("op_94_promoted_69"), val = fp32(0x1p+1)]; tensor var_6525 = pow(x = hidden_states_1039, y = var_94_promoted_69)[name = string("op_6525")]; tensor variance_139_axes_0 = const()[name = string("variance_139_axes_0"), val = tensor([-1])]; bool variance_139_keep_dims_0 = const()[name = string("variance_139_keep_dims_0"), val = bool(true)]; tensor variance_139 = reduce_mean(axes = variance_139_axes_0, keep_dims = variance_139_keep_dims_0, x = var_6525)[name = string("variance_139")]; fp32 var_6528 = const()[name = string("op_6528"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6529 = add(x = variance_139, y = var_6528)[name = string("op_6529")]; fp32 var_6530_epsilon_0 = const()[name = string("op_6530_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6530 = rsqrt(epsilon = var_6530_epsilon_0, x = var_6529)[name = string("op_6530")]; tensor hidden_states_1043 = mul(x = hidden_states_1039, y = var_6530)[name = string("hidden_states_1043")]; tensor input_275 = mul(x = model_model_layers_34_post_attention_layernorm_weight, y = hidden_states_1043)[name = string("input_275")]; tensor linear_242 = linear(bias = linear_4_bias_0, weight = model_model_layers_34_mlp_gate_proj_weight_quantized, x = input_275)[name = string("linear_242")]; tensor var_6542 = silu(x = linear_242)[name = string("op_6542")]; tensor linear_243 = linear(bias = linear_4_bias_0, weight = model_model_layers_34_mlp_up_proj_weight_quantized, x = input_275)[name = string("linear_243")]; tensor input_279 = mul(x = var_6542, y = linear_243)[name = string("input_279")]; tensor linear_244 = linear(bias = linear_3_bias_0, weight = model_model_layers_34_mlp_down_proj_weight_quantized, x = input_279)[name = string("linear_244")]; tensor hidden_states_1049 = add(x = hidden_states_1039, y = linear_244)[name = string("hidden_states_1049")]; fp32 var_94_promoted_70 = const()[name = string("op_94_promoted_70"), val = fp32(0x1p+1)]; tensor var_6555 = pow(x = hidden_states_1049, y = var_94_promoted_70)[name = string("op_6555")]; tensor variance_141_axes_0 = const()[name = string("variance_141_axes_0"), val = tensor([-1])]; bool variance_141_keep_dims_0 = const()[name = string("variance_141_keep_dims_0"), val = bool(true)]; tensor variance_141 = reduce_mean(axes = variance_141_axes_0, keep_dims = variance_141_keep_dims_0, x = var_6555)[name = string("variance_141")]; fp32 var_6558 = const()[name = string("op_6558"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6559 = add(x = variance_141, y = var_6558)[name = string("op_6559")]; fp32 var_6560_epsilon_0 = const()[name = string("op_6560_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6560 = rsqrt(epsilon = var_6560_epsilon_0, x = var_6559)[name = string("op_6560")]; tensor hidden_states_1053 = mul(x = hidden_states_1049, y = var_6560)[name = string("hidden_states_1053")]; tensor hidden_states_1057 = mul(x = model_model_layers_35_input_layernorm_weight, y = hidden_states_1053)[name = string("hidden_states_1057")]; tensor linear_245 = linear(bias = model_model_layers_35_self_attn_q_proj_bias, weight = model_model_layers_35_self_attn_q_proj_weight_quantized, x = hidden_states_1057)[name = string("linear_245")]; tensor linear_246 = linear(bias = model_model_layers_35_self_attn_k_proj_bias, weight = model_model_layers_35_self_attn_k_proj_weight_quantized, x = hidden_states_1057)[name = string("linear_246")]; tensor linear_247 = linear(bias = model_model_layers_35_self_attn_v_proj_bias, weight = model_model_layers_35_self_attn_v_proj_weight_quantized, x = hidden_states_1057)[name = string("linear_247")]; tensor concat_665x = const()[name = string("concat_665x"), val = tensor([1, -1, 16, 128])]; tensor var_6583 = reshape(shape = concat_665x, x = linear_245)[name = string("op_6583")]; tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_666x = const()[name = string("concat_666x"), val = tensor([1, -1, 2, 128])]; tensor var_6586 = reshape(shape = concat_666x, x = linear_246)[name = string("op_6586")]; tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_667x = const()[name = string("concat_667x"), val = tensor([1, -1, 2, 128])]; tensor var_6589 = reshape(shape = concat_667x, x = linear_247)[name = string("op_6589")]; tensor v_state_perm_0 = const()[name = string("v_state_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q = transpose(perm = q_perm_0, x = var_6583)[name = string("transpose_3")]; tensor var_6593 = mul(x = q, y = cos_7)[name = string("op_6593")]; tensor x1_141_begin_0 = const()[name = string("x1_141_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_141_end_0 = const()[name = string("x1_141_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_141_end_mask_0 = const()[name = string("x1_141_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_141 = slice_by_index(begin = x1_141_begin_0, end = x1_141_end_0, end_mask = x1_141_end_mask_0, x = q)[name = string("x1_141")]; tensor x2_141_begin_0 = const()[name = string("x2_141_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_141_end_0 = const()[name = string("x2_141_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_141_end_mask_0 = const()[name = string("x2_141_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_141 = slice_by_index(begin = x2_141_begin_0, end = x2_141_end_0, end_mask = x2_141_end_mask_0, x = q)[name = string("x2_141")]; fp32 const_73_promoted = const()[name = string("const_73_promoted"), val = fp32(-0x1p+0)]; tensor var_6604 = mul(x = x2_141, y = const_73_promoted)[name = string("op_6604")]; bool var_6606_interleave_0 = const()[name = string("op_6606_interleave_0"), val = bool(false)]; tensor var_6606 = concat(axis = var_88, interleave = var_6606_interleave_0, values = (var_6604, x1_141))[name = string("op_6606")]; tensor var_6607 = mul(x = var_6606, y = sin_7)[name = string("op_6607")]; tensor query_states = add(x = var_6593, y = var_6607)[name = string("query_states")]; tensor k = transpose(perm = k_perm_0, x = var_6586)[name = string("transpose_2")]; tensor var_6609 = mul(x = k, y = cos_7)[name = string("op_6609")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 2, 0, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 2, 0, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k)[name = string("x2")]; fp32 const_74_promoted = const()[name = string("const_74_promoted"), val = fp32(-0x1p+0)]; tensor var_6620 = mul(x = x2, y = const_74_promoted)[name = string("op_6620")]; bool var_6622_interleave_0 = const()[name = string("op_6622_interleave_0"), val = bool(false)]; tensor var_6622 = concat(axis = var_88, interleave = var_6622_interleave_0, values = (var_6620, x1))[name = string("op_6622")]; tensor var_6623 = mul(x = var_6622, y = sin_7)[name = string("op_6623")]; tensor k_state = add(x = var_6609, y = var_6623)[name = string("k_state")]; tensor expand_dims_420 = const()[name = string("expand_dims_420"), val = tensor([0])]; tensor expand_dims_421 = const()[name = string("expand_dims_421"), val = tensor([0])]; tensor expand_dims_423 = const()[name = string("expand_dims_423"), val = tensor([0])]; tensor concat_670_values0_0 = const()[name = string("concat_670_values0_0"), val = tensor([35])]; int32 concat_670_axis_0 = const()[name = string("concat_670_axis_0"), val = int32(0)]; bool concat_670_interleave_0 = const()[name = string("concat_670_interleave_0"), val = bool(false)]; tensor concat_670 = concat(axis = concat_670_axis_0, interleave = concat_670_interleave_0, values = (concat_670_values0_0, expand_dims_420, expand_dims_421, expand_dims_2, expand_dims_423))[name = string("concat_670")]; tensor key_cache_internal_tensor_assign_36_stride_0 = const()[name = string("key_cache_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_36_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_36_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_36 = slice_update(begin = concat_670, begin_mask = key_cache_internal_tensor_assign_36_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_36_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_36_squeeze_mask_0, stride = key_cache_internal_tensor_assign_36_stride_0, update = k_state, x = key_cache_internal_tensor_assign_35_1)[name = string("key_cache_internal_tensor_assign_36")]; string cast_503_dtype_0 = const()[name = string("cast_503_dtype_0"), val = string("fp16")]; tensor cast_503 = cast(dtype = cast_503_dtype_0, x = key_cache_internal_tensor_assign_36)[name = string("cast_4")]; write_state(data = cast_503, input = key_cache)[name = string("coreml_update_state_142_write_state")]; tensor coreml_update_state_142 = read_state(input = key_cache)[name = string("coreml_update_state_142")]; string key_cache_internal_tensor_assign_36_dtype_0 = const()[name = string("key_cache_internal_tensor_assign_36_dtype_0"), val = string("fp32")]; tensor value_cache_internal_tensor_assign_36_stride_0 = const()[name = string("value_cache_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_36_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_36_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state = transpose(perm = v_state_perm_0, x = var_6589)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_36 = slice_update(begin = concat_670, begin_mask = value_cache_internal_tensor_assign_36_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_36_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_36_squeeze_mask_0, stride = value_cache_internal_tensor_assign_36_stride_0, update = v_state, x = value_cache_internal_tensor_assign_35_1)[name = string("value_cache_internal_tensor_assign_36")]; string cast_504_dtype_0 = const()[name = string("cast_504_dtype_0"), val = string("fp16")]; tensor cast_504 = cast(dtype = cast_504_dtype_0, x = value_cache_internal_tensor_assign_36)[name = string("cast_3")]; write_state(data = cast_504, input = value_cache)[name = string("coreml_update_state_143_write_state")]; tensor coreml_update_state_143 = read_state(input = value_cache)[name = string("coreml_update_state_143")]; string value_cache_internal_tensor_assign_36_dtype_0 = const()[name = string("value_cache_internal_tensor_assign_36_dtype_0"), val = string("fp32")]; tensor var_6646_begin_0 = const()[name = string("op_6646_begin_0"), val = tensor([35, 0, 0, 0, 0])]; tensor var_6646_end_0 = const()[name = string("op_6646_end_0"), val = tensor([36, 1, 2, 2048, 128])]; tensor var_6646_end_mask_0 = const()[name = string("op_6646_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6646_squeeze_mask_0 = const()[name = string("op_6646_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_36_1 = cast(dtype = key_cache_internal_tensor_assign_36_dtype_0, x = coreml_update_state_142)[name = string("cast_2")]; tensor var_6646 = slice_by_index(begin = var_6646_begin_0, end = var_6646_end_0, end_mask = var_6646_end_mask_0, squeeze_mask = var_6646_squeeze_mask_0, x = key_cache_internal_tensor_assign_36_1)[name = string("op_6646")]; tensor var_6649_begin_0 = const()[name = string("op_6649_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6649_end_mask_0 = const()[name = string("op_6649_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6649 = slice_by_index(begin = var_6649_begin_0, end = concat_11, end_mask = var_6649_end_mask_0, x = var_6646)[name = string("op_6649")]; tensor var_6651_begin_0 = const()[name = string("op_6651_begin_0"), val = tensor([35, 0, 0, 0, 0])]; tensor var_6651_end_0 = const()[name = string("op_6651_end_0"), val = tensor([36, 1, 2, 2048, 128])]; tensor var_6651_end_mask_0 = const()[name = string("op_6651_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_6651_squeeze_mask_0 = const()[name = string("op_6651_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor value_cache_internal_tensor_assign_36_1 = cast(dtype = value_cache_internal_tensor_assign_36_dtype_0, x = coreml_update_state_143)[name = string("cast_1")]; tensor var_6651 = slice_by_index(begin = var_6651_begin_0, end = var_6651_end_0, end_mask = var_6651_end_mask_0, squeeze_mask = var_6651_squeeze_mask_0, x = value_cache_internal_tensor_assign_36_1)[name = string("op_6651")]; tensor var_6654_begin_0 = const()[name = string("op_6654_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6654_end_mask_0 = const()[name = string("op_6654_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_6654 = slice_by_index(begin = var_6654_begin_0, end = concat_11, end_mask = var_6654_end_mask_0, x = var_6651)[name = string("op_6654")]; tensor var_6656_shape = shape(x = var_6649)[name = string("op_6656_shape")]; int32 gather_643 = const()[name = string("gather_643"), val = int32(1)]; int32 gather_644 = const()[name = string("gather_644"), val = int32(2)]; int32 select_645 = const()[name = string("select_645"), val = int32(2)]; int32 gather_645_axis_0 = const()[name = string("gather_645_axis_0"), val = int32(0)]; int32 gather_645_batch_dims_0 = const()[name = string("gather_645_batch_dims_0"), val = int32(0)]; bool gather_645_validate_indices_0 = const()[name = string("gather_645_validate_indices_0"), val = bool(false)]; int32 gather_645 = gather(axis = gather_645_axis_0, batch_dims = gather_645_batch_dims_0, indices = select_645, validate_indices = gather_645_validate_indices_0, x = var_6656_shape)[name = string("gather_645")]; int32 gather_646 = const()[name = string("gather_646"), val = int32(128)]; tensor var_6663_axes_0 = const()[name = string("op_6663_axes_0"), val = tensor([2])]; tensor var_6663 = expand_dims(axes = var_6663_axes_0, x = var_6649)[name = string("op_6663")]; tensor shape_717 = shape(x = var_6663)[name = string("shape_717")]; int32 concat_678_axis_0 = const()[name = string("concat_678_axis_0"), val = int32(0)]; bool concat_678_interleave_0 = const()[name = string("concat_678_interleave_0"), val = bool(false)]; tensor concat_678 = concat(axis = concat_678_axis_0, interleave = concat_678_interleave_0, values = (gather_643, gather_644, var_100, gather_645, gather_646))[name = string("concat_678")]; tensor real_div_70 = real_div(x = concat_678, y = shape_717)[name = string("real_div_70")]; tensor hidden_states_1061 = tile(reps = real_div_70, x = var_6663)[name = string("hidden_states_1061")]; tensor concat_679x = const()[name = string("concat_679x"), val = tensor([1, 16, -1, 128])]; tensor key_states = reshape(shape = concat_679x, x = hidden_states_1061)[name = string("key_states")]; tensor var_6673_shape = shape(x = var_6654)[name = string("op_6673_shape")]; int32 gather_647 = const()[name = string("gather_647"), val = int32(1)]; int32 gather_648 = const()[name = string("gather_648"), val = int32(2)]; int32 select_649 = const()[name = string("select_649"), val = int32(2)]; int32 gather_649_axis_0 = const()[name = string("gather_649_axis_0"), val = int32(0)]; int32 gather_649_batch_dims_0 = const()[name = string("gather_649_batch_dims_0"), val = int32(0)]; bool gather_649_validate_indices_0 = const()[name = string("gather_649_validate_indices_0"), val = bool(false)]; int32 gather_649 = gather(axis = gather_649_axis_0, batch_dims = gather_649_batch_dims_0, indices = select_649, validate_indices = gather_649_validate_indices_0, x = var_6673_shape)[name = string("gather_649")]; int32 gather_650 = const()[name = string("gather_650"), val = int32(128)]; tensor var_6680_axes_0 = const()[name = string("op_6680_axes_0"), val = tensor([2])]; tensor var_6680 = expand_dims(axes = var_6680_axes_0, x = var_6654)[name = string("op_6680")]; tensor shape_722 = shape(x = var_6680)[name = string("shape_722")]; int32 concat_680_axis_0 = const()[name = string("concat_680_axis_0"), val = int32(0)]; bool concat_680_interleave_0 = const()[name = string("concat_680_interleave_0"), val = bool(false)]; tensor concat_680 = concat(axis = concat_680_axis_0, interleave = concat_680_interleave_0, values = (gather_647, gather_648, var_100, gather_649, gather_650))[name = string("concat_680")]; tensor real_div_71 = real_div(x = concat_680, y = shape_722)[name = string("real_div_71")]; tensor hidden_states_1065 = tile(reps = real_div_71, x = var_6680)[name = string("hidden_states_1065")]; tensor concat_681x = const()[name = string("concat_681x"), val = tensor([1, 16, -1, 128])]; tensor value_states = reshape(shape = concat_681x, x = hidden_states_1065)[name = string("value_states")]; tensor var_6690_shape = shape(x = key_states)[name = string("op_6690_shape")]; int32 select_651 = const()[name = string("select_651"), val = int32(2)]; int32 gather_651_axis_0 = const()[name = string("gather_651_axis_0"), val = int32(0)]; int32 gather_651_batch_dims_0 = const()[name = string("gather_651_batch_dims_0"), val = int32(0)]; bool gather_651_validate_indices_0 = const()[name = string("gather_651_validate_indices_0"), val = bool(false)]; int32 gather_651 = gather(axis = gather_651_axis_0, batch_dims = gather_651_batch_dims_0, indices = select_651, validate_indices = gather_651_validate_indices_0, x = var_6690_shape)[name = string("gather_651")]; int32 concat_682_values0_0 = const()[name = string("concat_682_values0_0"), val = int32(1)]; int32 concat_682_values1_0 = const()[name = string("concat_682_values1_0"), val = int32(1)]; int32 concat_682_values2_0 = const()[name = string("concat_682_values2_0"), val = int32(0)]; int32 concat_682_axis_0 = const()[name = string("concat_682_axis_0"), val = int32(0)]; bool concat_682_interleave_0 = const()[name = string("concat_682_interleave_0"), val = bool(false)]; tensor concat_682 = concat(axis = concat_682_axis_0, interleave = concat_682_interleave_0, values = (concat_682_values0_0, concat_682_values1_0, concat_682_values2_0, gather_651))[name = string("concat_682")]; tensor causal_mask_begin_0 = const()[name = string("causal_mask_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_end_mask_0 = const()[name = string("causal_mask_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_1 = slice_by_index(begin = causal_mask_begin_0, end = concat_682, end_mask = causal_mask_end_mask_0, x = cast_0)[name = string("causal_mask")]; tensor attn_output_141 = scaled_dot_product_attention(attn_mask = causal_mask_1, key = key_states, query = query_states, value = value_states)[name = string("attn_output_141")]; tensor var_6696_perm_0 = const()[name = string("op_6696_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_683x = const()[name = string("concat_683x"), val = tensor([1, -1, 2048])]; tensor var_6696 = transpose(perm = var_6696_perm_0, x = attn_output_141)[name = string("transpose_0")]; tensor input_281 = reshape(shape = concat_683x, x = var_6696)[name = string("input_281")]; tensor linear_248 = linear(bias = linear_3_bias_0, weight = model_model_layers_35_self_attn_o_proj_weight_quantized, x = input_281)[name = string("linear_248")]; tensor hidden_states_1069 = add(x = hidden_states_1049, y = linear_248)[name = string("hidden_states_1069")]; fp32 var_94_promoted_71 = const()[name = string("op_94_promoted_71"), val = fp32(0x1p+1)]; tensor var_6705 = pow(x = hidden_states_1069, y = var_94_promoted_71)[name = string("op_6705")]; tensor variance_143_axes_0 = const()[name = string("variance_143_axes_0"), val = tensor([-1])]; bool variance_143_keep_dims_0 = const()[name = string("variance_143_keep_dims_0"), val = bool(true)]; tensor variance_143 = reduce_mean(axes = variance_143_axes_0, keep_dims = variance_143_keep_dims_0, x = var_6705)[name = string("variance_143")]; fp32 var_6708 = const()[name = string("op_6708"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6709 = add(x = variance_143, y = var_6708)[name = string("op_6709")]; fp32 var_6710_epsilon_0 = const()[name = string("op_6710_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6710 = rsqrt(epsilon = var_6710_epsilon_0, x = var_6709)[name = string("op_6710")]; tensor hidden_states_1073 = mul(x = hidden_states_1069, y = var_6710)[name = string("hidden_states_1073")]; tensor input_283 = mul(x = model_model_layers_35_post_attention_layernorm_weight, y = hidden_states_1073)[name = string("input_283")]; tensor linear_249 = linear(bias = linear_4_bias_0, weight = model_model_layers_35_mlp_gate_proj_weight_quantized, x = input_283)[name = string("linear_249")]; tensor var_6722 = silu(x = linear_249)[name = string("op_6722")]; tensor linear_250 = linear(bias = linear_4_bias_0, weight = model_model_layers_35_mlp_up_proj_weight_quantized, x = input_283)[name = string("linear_250")]; tensor input_287 = mul(x = var_6722, y = linear_250)[name = string("input_287")]; tensor linear_251 = linear(bias = linear_3_bias_0, weight = model_model_layers_35_mlp_down_proj_weight_quantized, x = input_287)[name = string("linear_251")]; tensor hidden_states_1079 = add(x = hidden_states_1069, y = linear_251)[name = string("hidden_states_1079")]; fp32 var_94_promoted_72 = const()[name = string("op_94_promoted_72"), val = fp32(0x1p+1)]; tensor var_6731 = pow(x = hidden_states_1079, y = var_94_promoted_72)[name = string("op_6731")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; tensor variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_6731)[name = string("variance")]; fp32 var_6734 = const()[name = string("op_6734"), val = fp32(0x1.0c6f7ap-20)]; tensor var_6735 = add(x = variance, y = var_6734)[name = string("op_6735")]; fp32 var_6736_epsilon_0 = const()[name = string("op_6736_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6736 = rsqrt(epsilon = var_6736_epsilon_0, x = var_6735)[name = string("op_6736")]; tensor hidden_states_1083 = mul(x = hidden_states_1079, y = var_6736)[name = string("hidden_states_1083")]; tensor hidden_states = mul(x = model_model_norm_weight, y = hidden_states_1083)[name = string("hidden_states")]; tensor linear_252_bias_0 = const()[name = string("linear_252_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1929624000)))]; tensor logits_type_fp32 = linear(bias = linear_252_bias_0, weight = model_model_embed_tokens_weight_quantized, x = hidden_states)[name = string("linear_252")]; string cast_514_dtype_0 = const()[name = string("cast_514_dtype_0"), val = string("fp16")]; tensor logits = cast(dtype = cast_514_dtype_0, x = logits_type_fp32)[name = string("cast_0")]; } -> (logits); }