NovaBlend commited on
Commit
6640232
·
verified ·
1 Parent(s): 1dee2d4

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +13 -13
training_config.yml CHANGED
@@ -5,8 +5,8 @@ model:
5
  - v_proj
6
  apply_lora_to_mlp: false
7
  apply_lora_to_output: false
8
- lora_rank: 16
9
- lora_alpha: 32
10
  perception_tokens: 2
11
  use_clip: false
12
  tokenizer:
@@ -14,22 +14,22 @@ tokenizer:
14
  path: models/tokenizer.model
15
  checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
- checkpoint_dir: /workspace/omega_a2a/training
18
  checkpoint_files:
19
  - consolidated.00.pth
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
- output_dir: /workspace/omega_a2a/checkpoints
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
  interim_checkpoint_steps: 5000
26
  interim_gen_steps: null
27
- max_new_tokens: 170
28
  temperature: 0.8
29
  top_k: 200
30
  dataset:
31
  _component_: ds.EvenBatcher
32
- buffer_size: 36
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
@@ -46,19 +46,19 @@ dataset:
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
- batch_size: 4
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
- weight_decay: 0.0001
53
- lr: 3.0e-05
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
- num_warmup_steps: 100
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
  epochs: 6
60
  max_steps_per_epoch: null
61
- gradient_accumulation_steps: 64
62
  compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger:
@@ -67,7 +67,7 @@ metric_logger:
67
  log_every_n_steps: null
68
  device: cuda
69
  dtype: bf16
70
- enable_activation_checkpointing: false
71
  profiler:
72
  _component_: torchtune.utils.profiler
73
  enabled: false
@@ -77,7 +77,7 @@ inference:
77
  {video}
78
 
79
  Caption the previous video.'
80
- max_new_tokens: 170
81
  temperature: 0.6
82
  top_k: 200
83
  quantizer: null
 
5
  - v_proj
6
  apply_lora_to_mlp: false
7
  apply_lora_to_output: false
8
+ lora_rank: 32
9
+ lora_alpha: 64
10
  perception_tokens: 2
11
  use_clip: false
12
  tokenizer:
 
14
  path: models/tokenizer.model
15
  checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
+ checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/original
18
  checkpoint_files:
19
  - consolidated.00.pth
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
+ output_dir: output_checkpoints/experiment_1
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
  interim_checkpoint_steps: 5000
26
  interim_gen_steps: null
27
+ max_new_tokens: 200
28
  temperature: 0.8
29
  top_k: 200
30
  dataset:
31
  _component_: ds.EvenBatcher
32
+ buffer_size: 72
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
 
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
+ batch_size: 16
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
+ weight_decay: 0.001
53
+ lr: 0.0003
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
+ num_warmup_steps: 150
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
  epochs: 6
60
  max_steps_per_epoch: null
61
+ gradient_accumulation_steps: 16
62
  compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger:
 
67
  log_every_n_steps: null
68
  device: cuda
69
  dtype: bf16
70
+ enable_activation_checkpointing: true
71
  profiler:
72
  _component_: torchtune.utils.profiler
73
  enabled: false
 
77
  {video}
78
 
79
  Caption the previous video.'
80
+ max_new_tokens: 300
81
  temperature: 0.6
82
  top_k: 200
83
  quantizer: null