winglian commited on
Commit
4818380
·
1 Parent(s): 247825b

update stablelm config

Browse files
Files changed (1) hide show
  1. configs/stability_3b.yml +30 -7
configs/stability_3b.yml CHANGED
@@ -1,5 +1,6 @@
1
  base_model: stabilityai/stablelm-base-alpha-3b
2
- load_in_8bit: true
 
3
  datasets:
4
  - path: vicgalle/alpaca-gpt4
5
  type: alpaca
@@ -8,6 +9,7 @@ val_set_size: 0.04
8
  adapter:
9
  lora_model_dir:
10
  sequence_len: 4096
 
11
  lora_r: 8
12
  lora_alpha: 16
13
  lora_dropout: 0.05
@@ -15,19 +17,40 @@ lora_target_modules:
15
  - q_proj
16
  - v_proj
17
  lora_fan_in_fan_out: false
18
- wandb_project: stable-llama-3b
19
  wandb_watch:
20
  wandb_run_id:
21
  wandb_log_model: checkpoint
22
- output_dir: ./stable-llama-3b
23
- batch_size: 128
24
- micro_batch_size: 16
25
  num_epochs: 1
26
- learning_rate: 0.00003
 
 
 
27
  train_on_inputs: false
28
  group_by_length: false
29
  bf16: true
30
  tf32: true
31
- early_stopping_patience: 3
32
  resume_from_checkpoint:
33
  local_rank:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  base_model: stabilityai/stablelm-base-alpha-3b
2
+ base_model_config: stabilityai/stablelm-base-alpha-3b
3
+ load_in_8bit: false
4
  datasets:
5
  - path: vicgalle/alpaca-gpt4
6
  type: alpaca
 
9
  adapter:
10
  lora_model_dir:
11
  sequence_len: 4096
12
+ max_packed_sequence_len: 4096
13
  lora_r: 8
14
  lora_alpha: 16
15
  lora_dropout: 0.05
 
17
  - q_proj
18
  - v_proj
19
  lora_fan_in_fan_out: false
20
+ wandb_project: stable-alpaca-3b
21
  wandb_watch:
22
  wandb_run_id:
23
  wandb_log_model: checkpoint
24
+ output_dir: ./stable-alpaca-3b
25
+ batch_size: 2
26
+ micro_batch_size: 1
27
  num_epochs: 1
28
+ optimizer: adamw_bnb_8bit
29
+ torchdistx_path:
30
+ lr_scheduler: cosine
31
+ learning_rate: 0.0000002
32
  train_on_inputs: false
33
  group_by_length: false
34
  bf16: true
35
  tf32: true
36
+ early_stopping_patience:
37
  resume_from_checkpoint:
38
  local_rank:
39
+ logging_steps: 1
40
+ xformers_attention: true
41
+ flash_attention:
42
+ gptq_groupsize:
43
+ gptq_model_v1:
44
+ warmup_steps: 100
45
+ eval_steps: 50
46
+ save_steps: 200
47
+ debug:
48
+ deepspeed:
49
+ weight_decay: 0.01
50
+ fsdp:
51
+ fsdp_config:
52
+ #special_tokens:
53
+ # pad_token: "[PAD]"
54
+ # bos_token: "<s>"
55
+ # eos_token: "</s>"
56
+ # unk_token: "<unk>"