Set triton kernels as default. Remove deprecated config fields.

#5
by maxmbeck - opened
Files changed (1) hide show
  1. config.json +12 -5
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/nfs-gpu/xlstm/converted_model_checkpoints/dclm_mLSTMv1_7B_ctx8192_sep_finetune_2024-11-29T17:03:51_0_550000",
3
  "add_embedding_dropout": false,
4
  "add_forward_backend_padding": false,
5
  "add_out_norm": true,
@@ -9,18 +9,23 @@
9
  "architectures": [
10
  "xLSTMForCausalLM"
11
  ],
 
12
  "bos_token_id": 0,
13
  "cell_norm_eps": 1e-06,
 
 
14
  "embedding_dim": 4096,
15
  "eos_token_id": 2,
 
16
  "ffn_proj_factor": 2.667,
17
  "ffn_round_up_to_multiple_of": 64,
18
  "force_bos_token_insert": true,
19
- "forward_backend_name": "chunkwise--triton_limit_chunk",
20
  "gate_soft_cap": 15.0,
21
  "head_dim": 512,
22
  "igate_bias_init_range": -10.0,
 
23
  "mlstm_round_up_to_multiple_of": 64,
 
24
  "model_type": "xlstm",
25
  "norm_eps": 1e-06,
26
  "norm_reduction_force_float32": true,
@@ -30,12 +35,14 @@
30
  "pad_token_id": 1,
31
  "qk_dim_factor": 0.5,
32
  "return_last_states": true,
33
- "step_backend_name": "triton_fused",
 
34
  "tie_word_embeddings": false,
35
  "torch_dtype": "float32",
36
- "transformers_version": "4.47.0.dev0",
37
  "use_bias": false,
38
  "use_cache": true,
39
  "v_dim_factor": 1.0,
40
- "vocab_size": 50304
 
41
  }
 
1
  {
2
+ "_name_or_path": "NX-AI/xLSTM-7b",
3
  "add_embedding_dropout": false,
4
  "add_forward_backend_padding": false,
5
  "add_out_norm": true,
 
9
  "architectures": [
10
  "xLSTMForCausalLM"
11
  ],
12
+ "autocast_kernel_dtype": "bfloat16",
13
  "bos_token_id": 0,
14
  "cell_norm_eps": 1e-06,
15
+ "chunk_size": 64,
16
+ "chunkwise_kernel": "chunkwise--triton_xl_chunk",
17
  "embedding_dim": 4096,
18
  "eos_token_id": 2,
19
+ "eps": 1e-06,
20
  "ffn_proj_factor": 2.667,
21
  "ffn_round_up_to_multiple_of": 64,
22
  "force_bos_token_insert": true,
 
23
  "gate_soft_cap": 15.0,
24
  "head_dim": 512,
25
  "igate_bias_init_range": -10.0,
26
+ "inference_state_dtype": "float32",
27
  "mlstm_round_up_to_multiple_of": 64,
28
+ "mode": "inference",
29
  "model_type": "xlstm",
30
  "norm_eps": 1e-06,
31
  "norm_reduction_force_float32": true,
 
35
  "pad_token_id": 1,
36
  "qk_dim_factor": 0.5,
37
  "return_last_states": true,
38
+ "sequence_kernel": "native_sequence__triton",
39
+ "step_kernel": "triton",
40
  "tie_word_embeddings": false,
41
  "torch_dtype": "float32",
42
+ "transformers_version": "4.48.0.dev0",
43
  "use_bias": false,
44
  "use_cache": true,
45
  "v_dim_factor": 1.0,
46
+ "vocab_size": 50304,
47
+ "weight_mode": "single"
48
  }