Chakita commited on
Commit
2a1b492
·
1 Parent(s): 0e3caba

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +13 -0
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +14 -0
special_tokens_map.json CHANGED
@@ -1,4 +1,17 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
  "pad_token": "<pad>",
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<BRG>",
4
+ "N_00",
5
+ "N_01",
6
+ "N_02",
7
+ "N_03",
8
+ "N_04",
9
+ "N_05",
10
+ "N_06",
11
+ "N_07",
12
+ "N_08",
13
+ "N_09"
14
+ ],
15
  "bos_token": "<s>",
16
  "eos_token": "</s>",
17
  "pad_token": "<pad>",
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c1abd73425d69d27b6933af4fa2a004568434169689b37d1314c6ca3a1d2a7f
3
- size 14500541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa8a458a0b4246af98eeba2630a5817264d60de06571c35bcc5b9927de0d968f
3
+ size 14502533
tokenizer_config.json CHANGED
@@ -1,5 +1,19 @@
1
  {
2
  "add_prefix_space": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "bos_token": "<s>",
4
  "eos_token": "</s>",
5
  "name_or_path": "bigscience/bloom-560m",
 
1
  {
2
  "add_prefix_space": false,
3
+ "add_special_tokens": true,
4
+ "additional_special_tokens": [
5
+ "<BRG>",
6
+ "N_00",
7
+ "N_01",
8
+ "N_02",
9
+ "N_03",
10
+ "N_04",
11
+ "N_05",
12
+ "N_06",
13
+ "N_07",
14
+ "N_08",
15
+ "N_09"
16
+ ],
17
  "bos_token": "<s>",
18
  "eos_token": "</s>",
19
  "name_or_path": "bigscience/bloom-560m",