`aux_loss_alpha` should be 1e-4 instead of 1e-3?

#61
by cuichenx - opened
Files changed (1) hide show
  1. config.json +1 -1
config.json CHANGED
@@ -9,7 +9,7 @@
9
  "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
  },
12
- "aux_loss_alpha": 0.001,
13
  "bos_token_id": 0,
14
  "eos_token_id": 1,
15
  "ep_size": 1,
 
9
  "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
  },
12
+ "aux_loss_alpha": 0.0001,
13
  "bos_token_id": 0,
14
  "eos_token_id": 1,
15
  "ep_size": 1,