suriya7 commited on
Commit
0e24b78
·
verified ·
1 Parent(s): cdfd62b

Upload tokenizer

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. special_tokens_map.json +9 -15
  3. tokenizer_config.json +1 -1
  4. vocab.json +0 -0
README.md CHANGED
@@ -1,15 +1,15 @@
1
  ---
 
 
2
  library_name: transformers
 
 
3
  tags:
4
  - conversational-ai
5
  - fine-tuning
6
  - gpt2
7
  - causal-lm
8
  - chatbots
9
- license: apache-2.0
10
- model_name: ChatGPT-2.V2
11
- base_model:
12
- - MBZUAI/LaMini-GPT-774M
13
  ---
14
 
15
  # ChatGPT-2.V2 Model Card
 
1
  ---
2
+ base_model:
3
+ - MBZUAI/LaMini-GPT-774M
4
  library_name: transformers
5
+ license: apache-2.0
6
+ model_name: ChatGPT-2.V2
7
  tags:
8
  - conversational-ai
9
  - fine-tuning
10
  - gpt2
11
  - causal-lm
12
  - chatbots
 
 
 
 
13
  ---
14
 
15
  # ChatGPT-2.V2 Model Card
special_tokens_map.json CHANGED
@@ -1,19 +1,7 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<|im_start|>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<|im_end|>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- }
17
  ],
18
  "bos_token": {
19
  "content": "<|endoftext|>",
@@ -29,7 +17,13 @@
29
  "rstrip": false,
30
  "single_word": false
31
  },
32
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
33
  "unk_token": {
34
  "content": "<|endoftext|>",
35
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<|endoftext|>",
 
17
  "rstrip": false,
18
  "single_word": false
19
  },
20
+ "pad_token": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
  "unk_token": {
28
  "content": "<|endoftext|>",
29
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -47,6 +47,6 @@
47
  "model_max_length": 512,
48
  "pad_token": "<|endoftext|>",
49
  "padding_side": "right",
50
- "tokenizer_class": "GPT2Tokenizer",
51
  "unk_token": "<|endoftext|>"
52
  }
 
47
  "model_max_length": 512,
48
  "pad_token": "<|endoftext|>",
49
  "padding_side": "right",
50
+ "tokenizer_class": "CustomGPT2Tokenizer",
51
  "unk_token": "<|endoftext|>"
52
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff