ikala
/

bloom-zh-3b-chat

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

ikala-ray commited on May 25, 2023

Commit

587bc22

·

1 Parent(s): d512a31

Update README.md

Files changed (1) hide show

README.md +12 -12

README.md CHANGED Viewed

@@ -10,11 +10,11 @@ pipeline_tag: text-generation
 widget:
 - text: >-
     <|prompter|>What is a meme, and what's the history behind this
-    word?<|endoftext|><|assistant|>
-- text: <|prompter|>What's the Earth total population<|endoftext|><|assistant|>
 - text: >-
     <|prompter|>Write a story about future of AI
-    development<|endoftext|><|assistant|>
 datasets:
 - OpenAssistant/oasst1
 - databricks/databricks-dolly-15k
@@ -99,21 +99,21 @@ datasets:
 with internal datasets `ikala` so if you try to reproduce please remove the dataset
-redpajama-3b:
 ```
-redpajama-3b:
   dtype: fp16
-  log_dir: "redpajama_3b"
-  learning_rate: 1e-5
-  model_name: saved_models/RedPajama-INCITE-Base-3B-v1
-  output_dir: ikala_v4_3b
   weight_decay: 0.0
-  max_length: 8196
   warmup_steps: 2000
   gradient_checkpointing: true
-  gradient_accumulation_steps: 32
   per_device_train_batch_size: 1
-  per_device_eval_batch_size: 2
   eval_steps: 500
   save_steps: 1000
   num_train_epochs: 8

 widget:
 - text: >-
     <|prompter|>What is a meme, and what's the history behind this
+    word?</s><|assistant|>
+- text: <|prompter|>What's the Earth total population</s><|assistant|>
 - text: >-
     <|prompter|>Write a story about future of AI
+    development</s><|assistant|>
 datasets:
 - OpenAssistant/oasst1
 - databricks/databricks-dolly-15k
 with internal datasets `ikala` so if you try to reproduce please remove the dataset
+bloom-zh-3b:
 ```
+bloom-zh-3b:
   dtype: fp16
+  log_dir: "bloom-zh_3b"
+  learning_rate: 8e-6
+  model_name: ckip-joint/bloom-3b-zh
+  output_dir: bloom_model_v4_3b
   weight_decay: 0.0
+  max_length: 5120
   warmup_steps: 2000
   gradient_checkpointing: true
+  gradient_accumulation_steps: 30
   per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
   eval_steps: 500
   save_steps: 1000
   num_train_epochs: 8