bofenghuang
commited on
Commit
·
9944b75
1
Parent(s):
ce21f0e
up
Browse files- tokenizer.json +25 -25
- tokenizer.model +2 -2
- tokenizer_config.json +13 -12
tokenizer.json
CHANGED
@@ -50,7 +50,7 @@
|
|
50 |
},
|
51 |
{
|
52 |
"id": 5,
|
53 |
-
"content": "
|
54 |
"single_word": false,
|
55 |
"lstrip": false,
|
56 |
"rstrip": false,
|
@@ -59,7 +59,7 @@
|
|
59 |
},
|
60 |
{
|
61 |
"id": 6,
|
62 |
-
"content": "
|
63 |
"single_word": false,
|
64 |
"lstrip": false,
|
65 |
"rstrip": false,
|
@@ -68,7 +68,7 @@
|
|
68 |
},
|
69 |
{
|
70 |
"id": 7,
|
71 |
-
"content": "
|
72 |
"single_word": false,
|
73 |
"lstrip": false,
|
74 |
"rstrip": false,
|
@@ -77,7 +77,7 @@
|
|
77 |
},
|
78 |
{
|
79 |
"id": 8,
|
80 |
-
"content": "
|
81 |
"single_word": false,
|
82 |
"lstrip": false,
|
83 |
"rstrip": false,
|
@@ -86,7 +86,7 @@
|
|
86 |
},
|
87 |
{
|
88 |
"id": 9,
|
89 |
-
"content": "
|
90 |
"single_word": false,
|
91 |
"lstrip": false,
|
92 |
"rstrip": false,
|
@@ -95,7 +95,7 @@
|
|
95 |
},
|
96 |
{
|
97 |
"id": 10,
|
98 |
-
"content": "
|
99 |
"single_word": false,
|
100 |
"lstrip": false,
|
101 |
"rstrip": false,
|
@@ -104,7 +104,7 @@
|
|
104 |
},
|
105 |
{
|
106 |
"id": 11,
|
107 |
-
"content": "
|
108 |
"single_word": false,
|
109 |
"lstrip": false,
|
110 |
"rstrip": false,
|
@@ -113,7 +113,7 @@
|
|
113 |
},
|
114 |
{
|
115 |
"id": 12,
|
116 |
-
"content": "
|
117 |
"single_word": false,
|
118 |
"lstrip": false,
|
119 |
"rstrip": false,
|
@@ -122,7 +122,7 @@
|
|
122 |
},
|
123 |
{
|
124 |
"id": 13,
|
125 |
-
"content": "
|
126 |
"single_word": false,
|
127 |
"lstrip": false,
|
128 |
"rstrip": false,
|
@@ -131,7 +131,7 @@
|
|
131 |
},
|
132 |
{
|
133 |
"id": 14,
|
134 |
-
"content": "
|
135 |
"single_word": false,
|
136 |
"lstrip": false,
|
137 |
"rstrip": false,
|
@@ -140,7 +140,7 @@
|
|
140 |
},
|
141 |
{
|
142 |
"id": 15,
|
143 |
-
"content": "
|
144 |
"single_word": false,
|
145 |
"lstrip": false,
|
146 |
"rstrip": false,
|
@@ -149,7 +149,7 @@
|
|
149 |
},
|
150 |
{
|
151 |
"id": 16,
|
152 |
-
"content": "
|
153 |
"single_word": false,
|
154 |
"lstrip": false,
|
155 |
"rstrip": false,
|
@@ -7043,18 +7043,18 @@
|
|
7043 |
"</s>": 2,
|
7044 |
"[INST]": 3,
|
7045 |
"[/INST]": 4,
|
7046 |
-
"
|
7047 |
-
"
|
7048 |
-
"
|
7049 |
-
"
|
7050 |
-
"
|
7051 |
-
"
|
7052 |
-
"
|
7053 |
-
"
|
7054 |
-
"
|
7055 |
-
"
|
7056 |
-
"
|
7057 |
-
"
|
7058 |
"[control_15]": 17,
|
7059 |
"[control_16]": 18,
|
7060 |
"[control_17]": 19,
|
@@ -98790,4 +98790,4 @@
|
|
98790 |
"▁ ▁▁▁▁▁▁▁▁▁▁"
|
98791 |
]
|
98792 |
}
|
98793 |
-
}
|
|
|
50 |
},
|
51 |
{
|
52 |
"id": 5,
|
53 |
+
"content": "<tool_call>",
|
54 |
"single_word": false,
|
55 |
"lstrip": false,
|
56 |
"rstrip": false,
|
|
|
59 |
},
|
60 |
{
|
61 |
"id": 6,
|
62 |
+
"content": "<available_tools>",
|
63 |
"single_word": false,
|
64 |
"lstrip": false,
|
65 |
"rstrip": false,
|
|
|
68 |
},
|
69 |
{
|
70 |
"id": 7,
|
71 |
+
"content": "</available_tools>",
|
72 |
"single_word": false,
|
73 |
"lstrip": false,
|
74 |
"rstrip": false,
|
|
|
77 |
},
|
78 |
{
|
79 |
"id": 8,
|
80 |
+
"content": "<tool_response>",
|
81 |
"single_word": false,
|
82 |
"lstrip": false,
|
83 |
"rstrip": false,
|
|
|
86 |
},
|
87 |
{
|
88 |
"id": 9,
|
89 |
+
"content": "</tool_response>",
|
90 |
"single_word": false,
|
91 |
"lstrip": false,
|
92 |
"rstrip": false,
|
|
|
95 |
},
|
96 |
{
|
97 |
"id": 10,
|
98 |
+
"content": "<available_documents>",
|
99 |
"single_word": false,
|
100 |
"lstrip": false,
|
101 |
"rstrip": false,
|
|
|
104 |
},
|
105 |
{
|
106 |
"id": 11,
|
107 |
+
"content": "</available_documents>",
|
108 |
"single_word": false,
|
109 |
"lstrip": false,
|
110 |
"rstrip": false,
|
|
|
113 |
},
|
114 |
{
|
115 |
"id": 12,
|
116 |
+
"content": "<reference>",
|
117 |
"single_word": false,
|
118 |
"lstrip": false,
|
119 |
"rstrip": false,
|
|
|
122 |
},
|
123 |
{
|
124 |
"id": 13,
|
125 |
+
"content": "</reference>",
|
126 |
"single_word": false,
|
127 |
"lstrip": false,
|
128 |
"rstrip": false,
|
|
|
131 |
},
|
132 |
{
|
133 |
"id": 14,
|
134 |
+
"content": "<|system|>",
|
135 |
"single_word": false,
|
136 |
"lstrip": false,
|
137 |
"rstrip": false,
|
|
|
140 |
},
|
141 |
{
|
142 |
"id": 15,
|
143 |
+
"content": "<|user|>",
|
144 |
"single_word": false,
|
145 |
"lstrip": false,
|
146 |
"rstrip": false,
|
|
|
149 |
},
|
150 |
{
|
151 |
"id": 16,
|
152 |
+
"content": "<|assistant|>",
|
153 |
"single_word": false,
|
154 |
"lstrip": false,
|
155 |
"rstrip": false,
|
|
|
7043 |
"</s>": 2,
|
7044 |
"[INST]": 3,
|
7045 |
"[/INST]": 4,
|
7046 |
+
"<tool_call>": 5,
|
7047 |
+
"<available_tools>": 6,
|
7048 |
+
"</available_tools>": 7,
|
7049 |
+
"<tool_response>": 8,
|
7050 |
+
"</tool_response>": 9,
|
7051 |
+
"<available_documents>": 10,
|
7052 |
+
"</available_documents>": 11,
|
7053 |
+
"<reference>": 12,
|
7054 |
+
"</reference>": 13,
|
7055 |
+
"<|system|>": 14,
|
7056 |
+
"<|user|>": 15,
|
7057 |
+
"<|assistant|>": 16,
|
7058 |
"[control_15]": 17,
|
7059 |
"[control_16]": 18,
|
7060 |
"[control_17]": 19,
|
|
|
98790 |
"▁ ▁▁▁▁▁▁▁▁▁▁"
|
98791 |
]
|
98792 |
}
|
98793 |
+
}
|
tokenizer.model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c6f8a481c33393f0799a3fbb8b4905efda071b90a3c852b134ef4959890190a
|
3 |
+
size 587420
|
tokenizer_config.json
CHANGED
@@ -44,7 +44,7 @@
|
|
44 |
"special": true
|
45 |
},
|
46 |
"5": {
|
47 |
-
"content": "
|
48 |
"lstrip": false,
|
49 |
"normalized": false,
|
50 |
"rstrip": false,
|
@@ -52,7 +52,7 @@
|
|
52 |
"special": true
|
53 |
},
|
54 |
"6": {
|
55 |
-
"content": "
|
56 |
"lstrip": false,
|
57 |
"normalized": false,
|
58 |
"rstrip": false,
|
@@ -60,7 +60,7 @@
|
|
60 |
"special": true
|
61 |
},
|
62 |
"7": {
|
63 |
-
"content": "
|
64 |
"lstrip": false,
|
65 |
"normalized": false,
|
66 |
"rstrip": false,
|
@@ -68,7 +68,7 @@
|
|
68 |
"special": true
|
69 |
},
|
70 |
"8": {
|
71 |
-
"content": "
|
72 |
"lstrip": false,
|
73 |
"normalized": false,
|
74 |
"rstrip": false,
|
@@ -76,7 +76,7 @@
|
|
76 |
"special": true
|
77 |
},
|
78 |
"9": {
|
79 |
-
"content": "
|
80 |
"lstrip": false,
|
81 |
"normalized": false,
|
82 |
"rstrip": false,
|
@@ -84,7 +84,7 @@
|
|
84 |
"special": true
|
85 |
},
|
86 |
"10": {
|
87 |
-
"content": "
|
88 |
"lstrip": false,
|
89 |
"normalized": false,
|
90 |
"rstrip": false,
|
@@ -92,7 +92,7 @@
|
|
92 |
"special": true
|
93 |
},
|
94 |
"11": {
|
95 |
-
"content": "
|
96 |
"lstrip": false,
|
97 |
"normalized": false,
|
98 |
"rstrip": false,
|
@@ -100,7 +100,7 @@
|
|
100 |
"special": true
|
101 |
},
|
102 |
"12": {
|
103 |
-
"content": "
|
104 |
"lstrip": false,
|
105 |
"normalized": false,
|
106 |
"rstrip": false,
|
@@ -108,7 +108,7 @@
|
|
108 |
"special": true
|
109 |
},
|
110 |
"13": {
|
111 |
-
"content": "
|
112 |
"lstrip": false,
|
113 |
"normalized": false,
|
114 |
"rstrip": false,
|
@@ -116,7 +116,7 @@
|
|
116 |
"special": true
|
117 |
},
|
118 |
"14": {
|
119 |
-
"content": "
|
120 |
"lstrip": false,
|
121 |
"normalized": false,
|
122 |
"rstrip": false,
|
@@ -124,7 +124,7 @@
|
|
124 |
"special": true
|
125 |
},
|
126 |
"15": {
|
127 |
-
"content": "
|
128 |
"lstrip": false,
|
129 |
"normalized": false,
|
130 |
"rstrip": false,
|
@@ -132,7 +132,7 @@
|
|
132 |
"special": true
|
133 |
},
|
134 |
"16": {
|
135 |
-
"content": "
|
136 |
"lstrip": false,
|
137 |
"normalized": false,
|
138 |
"rstrip": false,
|
@@ -6173,6 +6173,7 @@
|
|
6173 |
}
|
6174 |
},
|
6175 |
"bos_token": "<s>",
|
|
|
6176 |
"clean_up_tokenization_spaces": false,
|
6177 |
"eos_token": "</s>",
|
6178 |
"legacy": false,
|
|
|
44 |
"special": true
|
45 |
},
|
46 |
"5": {
|
47 |
+
"content": "<tool_call>",
|
48 |
"lstrip": false,
|
49 |
"normalized": false,
|
50 |
"rstrip": false,
|
|
|
52 |
"special": true
|
53 |
},
|
54 |
"6": {
|
55 |
+
"content": "<available_tools>",
|
56 |
"lstrip": false,
|
57 |
"normalized": false,
|
58 |
"rstrip": false,
|
|
|
60 |
"special": true
|
61 |
},
|
62 |
"7": {
|
63 |
+
"content": "</available_tools>",
|
64 |
"lstrip": false,
|
65 |
"normalized": false,
|
66 |
"rstrip": false,
|
|
|
68 |
"special": true
|
69 |
},
|
70 |
"8": {
|
71 |
+
"content": "<tool_response>",
|
72 |
"lstrip": false,
|
73 |
"normalized": false,
|
74 |
"rstrip": false,
|
|
|
76 |
"special": true
|
77 |
},
|
78 |
"9": {
|
79 |
+
"content": "</tool_response>",
|
80 |
"lstrip": false,
|
81 |
"normalized": false,
|
82 |
"rstrip": false,
|
|
|
84 |
"special": true
|
85 |
},
|
86 |
"10": {
|
87 |
+
"content": "<available_documents>",
|
88 |
"lstrip": false,
|
89 |
"normalized": false,
|
90 |
"rstrip": false,
|
|
|
92 |
"special": true
|
93 |
},
|
94 |
"11": {
|
95 |
+
"content": "</available_documents>",
|
96 |
"lstrip": false,
|
97 |
"normalized": false,
|
98 |
"rstrip": false,
|
|
|
100 |
"special": true
|
101 |
},
|
102 |
"12": {
|
103 |
+
"content": "<reference>",
|
104 |
"lstrip": false,
|
105 |
"normalized": false,
|
106 |
"rstrip": false,
|
|
|
108 |
"special": true
|
109 |
},
|
110 |
"13": {
|
111 |
+
"content": "</reference>",
|
112 |
"lstrip": false,
|
113 |
"normalized": false,
|
114 |
"rstrip": false,
|
|
|
116 |
"special": true
|
117 |
},
|
118 |
"14": {
|
119 |
+
"content": "<|system|>",
|
120 |
"lstrip": false,
|
121 |
"normalized": false,
|
122 |
"rstrip": false,
|
|
|
124 |
"special": true
|
125 |
},
|
126 |
"15": {
|
127 |
+
"content": "<|user|>",
|
128 |
"lstrip": false,
|
129 |
"normalized": false,
|
130 |
"rstrip": false,
|
|
|
132 |
"special": true
|
133 |
},
|
134 |
"16": {
|
135 |
+
"content": "<|assistant|>",
|
136 |
"lstrip": false,
|
137 |
"normalized": false,
|
138 |
"rstrip": false,
|
|
|
6173 |
}
|
6174 |
},
|
6175 |
"bos_token": "<s>",
|
6176 |
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
6177 |
"clean_up_tokenization_spaces": false,
|
6178 |
"eos_token": "</s>",
|
6179 |
"legacy": false,
|