Tippawan commited on
Commit
83bc9a6
·
verified ·
1 Parent(s): 983beac

End of training

Browse files
Files changed (2) hide show
  1. README.md +8 -8
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
- - name: proof-reading-SeaLLM3-7B-Chat-3090-v6
10
  results: []
11
  ---
12
 
@@ -26,7 +26,7 @@ load_in_4bit: true
26
  strict: false
27
 
28
  datasets:
29
- - path: Tippawan/pr-6-seallm-messages-only
30
  type: sharegpt
31
  conversation: chatml
32
  field_messages: messages
@@ -41,7 +41,7 @@ eval_sample_packing: false
41
  pad_to_sequence_len: false
42
 
43
  push_to_hub: true
44
- hub_model_id: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v6 # Replace with your Hugging Face repo ID
45
  use_auth_token: true # Ensure you have set your Hugging Face API token in the environment
46
  hub_private_repo: true # Set to true if you want the repository to be private
47
  hub_strategy: all_checkpoints
@@ -49,14 +49,14 @@ save_total_limit: 3
49
  load_best_model_at_end: true
50
 
51
  adapter: lora
52
- lora_model_dir: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v5
53
  lora_r: 16
54
  lora_alpha: 32
55
  lora_dropout: 0.05
56
  lora_target_linear: true
57
  lora_fan_in_fan_out:
58
 
59
- wandb_project: proof-reading-SeaLLM3-7B-Chat-3090-v6
60
  wandb_entity:
61
  wandb_watch:
62
  wandb_name:
@@ -64,7 +64,7 @@ wandb_log_model:
64
 
65
  gradient_accumulation_steps: 4
66
  micro_batch_size: 8
67
- num_epochs: 10 #editted 3
68
  optimizer: adamw_torch
69
  lr_scheduler: cosine
70
  learning_rate: 0.0002
@@ -96,7 +96,7 @@ special_tokens:
96
 
97
  </details><br>
98
 
99
- # proof-reading-SeaLLM3-7B-Chat-3090-v6
100
 
101
  This model is a fine-tuned version of [SeaLLMs/SeaLLM3-7B-Chat](https://huggingface.co/SeaLLMs/SeaLLM3-7B-Chat) on the None dataset.
102
 
@@ -126,7 +126,7 @@ The following hyperparameters were used during training:
126
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
127
  - lr_scheduler_type: cosine
128
  - lr_scheduler_warmup_steps: 10
129
- - num_epochs: 10
130
 
131
  ### Training results
132
 
 
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: proof-reading-SeaLLM3-7B-Chat-3090-v7
10
  results: []
11
  ---
12
 
 
26
  strict: false
27
 
28
  datasets:
29
+ - path: Tippawan/pr-7-seallm
30
  type: sharegpt
31
  conversation: chatml
32
  field_messages: messages
 
41
  pad_to_sequence_len: false
42
 
43
  push_to_hub: true
44
+ hub_model_id: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v7 # Replace with your Hugging Face repo ID
45
  use_auth_token: true # Ensure you have set your Hugging Face API token in the environment
46
  hub_private_repo: true # Set to true if you want the repository to be private
47
  hub_strategy: all_checkpoints
 
49
  load_best_model_at_end: true
50
 
51
  adapter: lora
52
+ lora_model_dir: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v6
53
  lora_r: 16
54
  lora_alpha: 32
55
  lora_dropout: 0.05
56
  lora_target_linear: true
57
  lora_fan_in_fan_out:
58
 
59
+ wandb_project: proof-reading-SeaLLM3-7B-Chat-3090-v7
60
  wandb_entity:
61
  wandb_watch:
62
  wandb_name:
 
64
 
65
  gradient_accumulation_steps: 4
66
  micro_batch_size: 8
67
+ num_epochs: 1 #editted 3
68
  optimizer: adamw_torch
69
  lr_scheduler: cosine
70
  learning_rate: 0.0002
 
96
 
97
  </details><br>
98
 
99
+ # proof-reading-SeaLLM3-7B-Chat-3090-v7
100
 
101
  This model is a fine-tuned version of [SeaLLMs/SeaLLM3-7B-Chat](https://huggingface.co/SeaLLMs/SeaLLM3-7B-Chat) on the None dataset.
102
 
 
126
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
127
  - lr_scheduler_type: cosine
128
  - lr_scheduler_warmup_steps: 10
129
+ - num_epochs: 1
130
 
131
  ### Training results
132
 
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b34f0b8b3c4464ace9eb97017538ef0a0d11f2df2637924714bd32bd7b5994bc
3
  size 161621802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148fcbc3147964e77cd19ea6bcf16d29a91db5a9938b555fcc453b35032ca6d5
3
  size 161621802