ikala-ray commited on
Commit
587bc22
·
1 Parent(s): d512a31

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -12
README.md CHANGED
@@ -10,11 +10,11 @@ pipeline_tag: text-generation
10
  widget:
11
  - text: >-
12
  <|prompter|>What is a meme, and what's the history behind this
13
- word?<|endoftext|><|assistant|>
14
- - text: <|prompter|>What's the Earth total population<|endoftext|><|assistant|>
15
  - text: >-
16
  <|prompter|>Write a story about future of AI
17
- development<|endoftext|><|assistant|>
18
  datasets:
19
  - OpenAssistant/oasst1
20
  - databricks/databricks-dolly-15k
@@ -99,21 +99,21 @@ datasets:
99
 
100
  with internal datasets `ikala` so if you try to reproduce please remove the dataset
101
 
102
- redpajama-3b:
103
  ```
104
- redpajama-3b:
105
  dtype: fp16
106
- log_dir: "redpajama_3b"
107
- learning_rate: 1e-5
108
- model_name: saved_models/RedPajama-INCITE-Base-3B-v1
109
- output_dir: ikala_v4_3b
110
  weight_decay: 0.0
111
- max_length: 8196
112
  warmup_steps: 2000
113
  gradient_checkpointing: true
114
- gradient_accumulation_steps: 32
115
  per_device_train_batch_size: 1
116
- per_device_eval_batch_size: 2
117
  eval_steps: 500
118
  save_steps: 1000
119
  num_train_epochs: 8
 
10
  widget:
11
  - text: >-
12
  <|prompter|>What is a meme, and what's the history behind this
13
+ word?</s><|assistant|>
14
+ - text: <|prompter|>What's the Earth total population</s><|assistant|>
15
  - text: >-
16
  <|prompter|>Write a story about future of AI
17
+ development</s><|assistant|>
18
  datasets:
19
  - OpenAssistant/oasst1
20
  - databricks/databricks-dolly-15k
 
99
 
100
  with internal datasets `ikala` so if you try to reproduce please remove the dataset
101
 
102
+ bloom-zh-3b:
103
  ```
104
+ bloom-zh-3b:
105
  dtype: fp16
106
+ log_dir: "bloom-zh_3b"
107
+ learning_rate: 8e-6
108
+ model_name: ckip-joint/bloom-3b-zh
109
+ output_dir: bloom_model_v4_3b
110
  weight_decay: 0.0
111
+ max_length: 5120
112
  warmup_steps: 2000
113
  gradient_checkpointing: true
114
+ gradient_accumulation_steps: 30
115
  per_device_train_batch_size: 1
116
+ per_device_eval_batch_size: 1
117
  eval_steps: 500
118
  save_steps: 1000
119
  num_train_epochs: 8