jondurbin commited on
Commit
fcf758c
·
verified ·
1 Parent(s): 2d36f42

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 32001,
3
+ "<|im_start|>": 32000,
4
+ "<|special_0|>": 32002,
5
+ "<|special_10|>": 32012,
6
+ "<|special_11|>": 32013,
7
+ "<|special_12|>": 32014,
8
+ "<|special_13|>": 32015,
9
+ "<|special_14|>": 32016,
10
+ "<|special_15|>": 32017,
11
+ "<|special_16|>": 32018,
12
+ "<|special_17|>": 32019,
13
+ "<|special_18|>": 32020,
14
+ "<|special_19|>": 32021,
15
+ "<|special_1|>": 32003,
16
+ "<|special_20|>": 32022,
17
+ "<|special_21|>": 32023,
18
+ "<|special_22|>": 32024,
19
+ "<|special_23|>": 32025,
20
+ "<|special_24|>": 32026,
21
+ "<|special_25|>": 32027,
22
+ "<|special_26|>": 32028,
23
+ "<|special_27|>": 32029,
24
+ "<|special_28|>": 32030,
25
+ "<|special_29|>": 32031,
26
+ "<|special_2|>": 32004,
27
+ "<|special_30|>": 32032,
28
+ "<|special_31|>": 32033,
29
+ "<|special_32|>": 32034,
30
+ "<|special_33|>": 32035,
31
+ "<|special_34|>": 32036,
32
+ "<|special_35|>": 32037,
33
+ "<|special_36|>": 32038,
34
+ "<|special_37|>": 32039,
35
+ "<|special_38|>": 32040,
36
+ "<|special_39|>": 32041,
37
+ "<|special_3|>": 32005,
38
+ "<|special_40|>": 32042,
39
+ "<|special_41|>": 32043,
40
+ "<|special_42|>": 32044,
41
+ "<|special_43|>": 32045,
42
+ "<|special_44|>": 32046,
43
+ "<|special_45|>": 32047,
44
+ "<|special_46|>": 32048,
45
+ "<|special_47|>": 32049,
46
+ "<|special_48|>": 32050,
47
+ "<|special_49|>": 32051,
48
+ "<|special_4|>": 32006,
49
+ "<|special_50|>": 32052,
50
+ "<|special_51|>": 32053,
51
+ "<|special_52|>": 32054,
52
+ "<|special_53|>": 32055,
53
+ "<|special_54|>": 32056,
54
+ "<|special_55|>": 32057,
55
+ "<|special_56|>": 32058,
56
+ "<|special_57|>": 32059,
57
+ "<|special_58|>": 32060,
58
+ "<|special_59|>": 32061,
59
+ "<|special_5|>": 32007,
60
+ "<|special_60|>": 32062,
61
+ "<|special_61|>": 32063,
62
+ "<|special_6|>": 32008,
63
+ "<|special_7|>": 32009,
64
+ "<|special_8|>": 32010,
65
+ "<|special_9|>": 32011
66
+ }
special_tokens_map.json CHANGED
@@ -1,4 +1,70 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|special_0|>",
6
+ "<|special_1|>",
7
+ "<|special_2|>",
8
+ "<|special_3|>",
9
+ "<|special_4|>",
10
+ "<|special_5|>",
11
+ "<|special_6|>",
12
+ "<|special_7|>",
13
+ "<|special_8|>",
14
+ "<|special_9|>",
15
+ "<|special_10|>",
16
+ "<|special_11|>",
17
+ "<|special_12|>",
18
+ "<|special_13|>",
19
+ "<|special_14|>",
20
+ "<|special_15|>",
21
+ "<|special_16|>",
22
+ "<|special_17|>",
23
+ "<|special_18|>",
24
+ "<|special_19|>",
25
+ "<|special_20|>",
26
+ "<|special_21|>",
27
+ "<|special_22|>",
28
+ "<|special_23|>",
29
+ "<|special_24|>",
30
+ "<|special_25|>",
31
+ "<|special_26|>",
32
+ "<|special_27|>",
33
+ "<|special_28|>",
34
+ "<|special_29|>",
35
+ "<|special_30|>",
36
+ "<|special_31|>",
37
+ "<|special_32|>",
38
+ "<|special_33|>",
39
+ "<|special_34|>",
40
+ "<|special_35|>",
41
+ "<|special_36|>",
42
+ "<|special_37|>",
43
+ "<|special_38|>",
44
+ "<|special_39|>",
45
+ "<|special_40|>",
46
+ "<|special_41|>",
47
+ "<|special_42|>",
48
+ "<|special_43|>",
49
+ "<|special_44|>",
50
+ "<|special_45|>",
51
+ "<|special_46|>",
52
+ "<|special_47|>",
53
+ "<|special_48|>",
54
+ "<|special_49|>",
55
+ "<|special_50|>",
56
+ "<|special_51|>",
57
+ "<|special_52|>",
58
+ "<|special_53|>",
59
+ "<|special_54|>",
60
+ "<|special_55|>",
61
+ "<|special_56|>",
62
+ "<|special_57|>",
63
+ "<|special_58|>",
64
+ "<|special_59|>",
65
+ "<|special_60|>",
66
+ "<|special_61|>"
67
+ ],
68
  "bos_token": {
69
  "content": "<s>",
70
  "lstrip": false,
tokenizer.json CHANGED
@@ -29,6 +29,582 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 32000,
35
+ "content": "<|im_start|>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 32001,
44
+ "content": "<|im_end|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 32002,
53
+ "content": "<|special_0|>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 32003,
62
+ "content": "<|special_1|>",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 32004,
71
+ "content": "<|special_2|>",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 32005,
80
+ "content": "<|special_3|>",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 32006,
89
+ "content": "<|special_4|>",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 32007,
98
+ "content": "<|special_5|>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 32008,
107
+ "content": "<|special_6|>",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
+ },
114
+ {
115
+ "id": 32009,
116
+ "content": "<|special_7|>",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
+ },
123
+ {
124
+ "id": 32010,
125
+ "content": "<|special_8|>",
126
+ "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": false,
129
+ "normalized": false,
130
+ "special": true
131
+ },
132
+ {
133
+ "id": 32011,
134
+ "content": "<|special_9|>",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
+ "normalized": false,
139
+ "special": true
140
+ },
141
+ {
142
+ "id": 32012,
143
+ "content": "<|special_10|>",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": false,
148
+ "special": true
149
+ },
150
+ {
151
+ "id": 32013,
152
+ "content": "<|special_11|>",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": false,
157
+ "special": true
158
+ },
159
+ {
160
+ "id": 32014,
161
+ "content": "<|special_12|>",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
+ },
168
+ {
169
+ "id": 32015,
170
+ "content": "<|special_13|>",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
+ },
177
+ {
178
+ "id": 32016,
179
+ "content": "<|special_14|>",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
+ },
186
+ {
187
+ "id": 32017,
188
+ "content": "<|special_15|>",
189
+ "single_word": false,
190
+ "lstrip": false,
191
+ "rstrip": false,
192
+ "normalized": false,
193
+ "special": true
194
+ },
195
+ {
196
+ "id": 32018,
197
+ "content": "<|special_16|>",
198
+ "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
+ },
204
+ {
205
+ "id": 32019,
206
+ "content": "<|special_17|>",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
+ },
213
+ {
214
+ "id": 32020,
215
+ "content": "<|special_18|>",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
+ },
222
+ {
223
+ "id": 32021,
224
+ "content": "<|special_19|>",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
+ },
231
+ {
232
+ "id": 32022,
233
+ "content": "<|special_20|>",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 32023,
242
+ "content": "<|special_21|>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ {
250
+ "id": 32024,
251
+ "content": "<|special_22|>",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
+ },
258
+ {
259
+ "id": 32025,
260
+ "content": "<|special_23|>",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
+ },
267
+ {
268
+ "id": 32026,
269
+ "content": "<|special_24|>",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
+ },
276
+ {
277
+ "id": 32027,
278
+ "content": "<|special_25|>",
279
+ "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
+ },
285
+ {
286
+ "id": 32028,
287
+ "content": "<|special_26|>",
288
+ "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
+ },
294
+ {
295
+ "id": 32029,
296
+ "content": "<|special_27|>",
297
+ "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
+ },
303
+ {
304
+ "id": 32030,
305
+ "content": "<|special_28|>",
306
+ "single_word": false,
307
+ "lstrip": false,
308
+ "rstrip": false,
309
+ "normalized": false,
310
+ "special": true
311
+ },
312
+ {
313
+ "id": 32031,
314
+ "content": "<|special_29|>",
315
+ "single_word": false,
316
+ "lstrip": false,
317
+ "rstrip": false,
318
+ "normalized": false,
319
+ "special": true
320
+ },
321
+ {
322
+ "id": 32032,
323
+ "content": "<|special_30|>",
324
+ "single_word": false,
325
+ "lstrip": false,
326
+ "rstrip": false,
327
+ "normalized": false,
328
+ "special": true
329
+ },
330
+ {
331
+ "id": 32033,
332
+ "content": "<|special_31|>",
333
+ "single_word": false,
334
+ "lstrip": false,
335
+ "rstrip": false,
336
+ "normalized": false,
337
+ "special": true
338
+ },
339
+ {
340
+ "id": 32034,
341
+ "content": "<|special_32|>",
342
+ "single_word": false,
343
+ "lstrip": false,
344
+ "rstrip": false,
345
+ "normalized": false,
346
+ "special": true
347
+ },
348
+ {
349
+ "id": 32035,
350
+ "content": "<|special_33|>",
351
+ "single_word": false,
352
+ "lstrip": false,
353
+ "rstrip": false,
354
+ "normalized": false,
355
+ "special": true
356
+ },
357
+ {
358
+ "id": 32036,
359
+ "content": "<|special_34|>",
360
+ "single_word": false,
361
+ "lstrip": false,
362
+ "rstrip": false,
363
+ "normalized": false,
364
+ "special": true
365
+ },
366
+ {
367
+ "id": 32037,
368
+ "content": "<|special_35|>",
369
+ "single_word": false,
370
+ "lstrip": false,
371
+ "rstrip": false,
372
+ "normalized": false,
373
+ "special": true
374
+ },
375
+ {
376
+ "id": 32038,
377
+ "content": "<|special_36|>",
378
+ "single_word": false,
379
+ "lstrip": false,
380
+ "rstrip": false,
381
+ "normalized": false,
382
+ "special": true
383
+ },
384
+ {
385
+ "id": 32039,
386
+ "content": "<|special_37|>",
387
+ "single_word": false,
388
+ "lstrip": false,
389
+ "rstrip": false,
390
+ "normalized": false,
391
+ "special": true
392
+ },
393
+ {
394
+ "id": 32040,
395
+ "content": "<|special_38|>",
396
+ "single_word": false,
397
+ "lstrip": false,
398
+ "rstrip": false,
399
+ "normalized": false,
400
+ "special": true
401
+ },
402
+ {
403
+ "id": 32041,
404
+ "content": "<|special_39|>",
405
+ "single_word": false,
406
+ "lstrip": false,
407
+ "rstrip": false,
408
+ "normalized": false,
409
+ "special": true
410
+ },
411
+ {
412
+ "id": 32042,
413
+ "content": "<|special_40|>",
414
+ "single_word": false,
415
+ "lstrip": false,
416
+ "rstrip": false,
417
+ "normalized": false,
418
+ "special": true
419
+ },
420
+ {
421
+ "id": 32043,
422
+ "content": "<|special_41|>",
423
+ "single_word": false,
424
+ "lstrip": false,
425
+ "rstrip": false,
426
+ "normalized": false,
427
+ "special": true
428
+ },
429
+ {
430
+ "id": 32044,
431
+ "content": "<|special_42|>",
432
+ "single_word": false,
433
+ "lstrip": false,
434
+ "rstrip": false,
435
+ "normalized": false,
436
+ "special": true
437
+ },
438
+ {
439
+ "id": 32045,
440
+ "content": "<|special_43|>",
441
+ "single_word": false,
442
+ "lstrip": false,
443
+ "rstrip": false,
444
+ "normalized": false,
445
+ "special": true
446
+ },
447
+ {
448
+ "id": 32046,
449
+ "content": "<|special_44|>",
450
+ "single_word": false,
451
+ "lstrip": false,
452
+ "rstrip": false,
453
+ "normalized": false,
454
+ "special": true
455
+ },
456
+ {
457
+ "id": 32047,
458
+ "content": "<|special_45|>",
459
+ "single_word": false,
460
+ "lstrip": false,
461
+ "rstrip": false,
462
+ "normalized": false,
463
+ "special": true
464
+ },
465
+ {
466
+ "id": 32048,
467
+ "content": "<|special_46|>",
468
+ "single_word": false,
469
+ "lstrip": false,
470
+ "rstrip": false,
471
+ "normalized": false,
472
+ "special": true
473
+ },
474
+ {
475
+ "id": 32049,
476
+ "content": "<|special_47|>",
477
+ "single_word": false,
478
+ "lstrip": false,
479
+ "rstrip": false,
480
+ "normalized": false,
481
+ "special": true
482
+ },
483
+ {
484
+ "id": 32050,
485
+ "content": "<|special_48|>",
486
+ "single_word": false,
487
+ "lstrip": false,
488
+ "rstrip": false,
489
+ "normalized": false,
490
+ "special": true
491
+ },
492
+ {
493
+ "id": 32051,
494
+ "content": "<|special_49|>",
495
+ "single_word": false,
496
+ "lstrip": false,
497
+ "rstrip": false,
498
+ "normalized": false,
499
+ "special": true
500
+ },
501
+ {
502
+ "id": 32052,
503
+ "content": "<|special_50|>",
504
+ "single_word": false,
505
+ "lstrip": false,
506
+ "rstrip": false,
507
+ "normalized": false,
508
+ "special": true
509
+ },
510
+ {
511
+ "id": 32053,
512
+ "content": "<|special_51|>",
513
+ "single_word": false,
514
+ "lstrip": false,
515
+ "rstrip": false,
516
+ "normalized": false,
517
+ "special": true
518
+ },
519
+ {
520
+ "id": 32054,
521
+ "content": "<|special_52|>",
522
+ "single_word": false,
523
+ "lstrip": false,
524
+ "rstrip": false,
525
+ "normalized": false,
526
+ "special": true
527
+ },
528
+ {
529
+ "id": 32055,
530
+ "content": "<|special_53|>",
531
+ "single_word": false,
532
+ "lstrip": false,
533
+ "rstrip": false,
534
+ "normalized": false,
535
+ "special": true
536
+ },
537
+ {
538
+ "id": 32056,
539
+ "content": "<|special_54|>",
540
+ "single_word": false,
541
+ "lstrip": false,
542
+ "rstrip": false,
543
+ "normalized": false,
544
+ "special": true
545
+ },
546
+ {
547
+ "id": 32057,
548
+ "content": "<|special_55|>",
549
+ "single_word": false,
550
+ "lstrip": false,
551
+ "rstrip": false,
552
+ "normalized": false,
553
+ "special": true
554
+ },
555
+ {
556
+ "id": 32058,
557
+ "content": "<|special_56|>",
558
+ "single_word": false,
559
+ "lstrip": false,
560
+ "rstrip": false,
561
+ "normalized": false,
562
+ "special": true
563
+ },
564
+ {
565
+ "id": 32059,
566
+ "content": "<|special_57|>",
567
+ "single_word": false,
568
+ "lstrip": false,
569
+ "rstrip": false,
570
+ "normalized": false,
571
+ "special": true
572
+ },
573
+ {
574
+ "id": 32060,
575
+ "content": "<|special_58|>",
576
+ "single_word": false,
577
+ "lstrip": false,
578
+ "rstrip": false,
579
+ "normalized": false,
580
+ "special": true
581
+ },
582
+ {
583
+ "id": 32061,
584
+ "content": "<|special_59|>",
585
+ "single_word": false,
586
+ "lstrip": false,
587
+ "rstrip": false,
588
+ "normalized": false,
589
+ "special": true
590
+ },
591
+ {
592
+ "id": 32062,
593
+ "content": "<|special_60|>",
594
+ "single_word": false,
595
+ "lstrip": false,
596
+ "rstrip": false,
597
+ "normalized": false,
598
+ "special": true
599
+ },
600
+ {
601
+ "id": 32063,
602
+ "content": "<|special_61|>",
603
+ "single_word": false,
604
+ "lstrip": false,
605
+ "rstrip": false,
606
+ "normalized": false,
607
+ "special": true
608
  }
609
  ],
610
  "normalizer": {
tokenizer_config.json CHANGED
@@ -25,23 +25,597 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  },
30
- "additional_special_tokens": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "bos_token": "<s>",
32
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "legacy": true,
36
- "max_length": 2048,
37
  "model_max_length": 1000000000000000019884624838656,
38
  "pad_token": "<unk>",
 
39
  "sp_model_kwargs": {},
40
  "spaces_between_special_tokens": false,
41
- "stride": 0,
42
  "tokenizer_class": "LlamaTokenizer",
43
- "truncation_side": "right",
44
- "truncation_strategy": "longest_first",
45
  "unk_token": "<unk>",
46
  "use_default_system_prompt": false
47
  }
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<|im_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|im_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "32002": {
46
+ "content": "<|special_0|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "32003": {
54
+ "content": "<|special_1|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "32004": {
62
+ "content": "<|special_2|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "32005": {
70
+ "content": "<|special_3|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "32006": {
78
+ "content": "<|special_4|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "32007": {
86
+ "content": "<|special_5|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "32008": {
94
+ "content": "<|special_6|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "32009": {
102
+ "content": "<|special_7|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "32010": {
110
+ "content": "<|special_8|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "32011": {
118
+ "content": "<|special_9|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": true
124
+ },
125
+ "32012": {
126
+ "content": "<|special_10|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": true
132
+ },
133
+ "32013": {
134
+ "content": "<|special_11|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": true
140
+ },
141
+ "32014": {
142
+ "content": "<|special_12|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": true
148
+ },
149
+ "32015": {
150
+ "content": "<|special_13|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": true
156
+ },
157
+ "32016": {
158
+ "content": "<|special_14|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": true
164
+ },
165
+ "32017": {
166
+ "content": "<|special_15|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": true
172
+ },
173
+ "32018": {
174
+ "content": "<|special_16|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": true
180
+ },
181
+ "32019": {
182
+ "content": "<|special_17|>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": true
188
+ },
189
+ "32020": {
190
+ "content": "<|special_18|>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": true
196
+ },
197
+ "32021": {
198
+ "content": "<|special_19|>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "32022": {
206
+ "content": "<|special_20|>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "32023": {
214
+ "content": "<|special_21|>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "32024": {
222
+ "content": "<|special_22|>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "32025": {
230
+ "content": "<|special_23|>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "32026": {
238
+ "content": "<|special_24|>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "32027": {
246
+ "content": "<|special_25|>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "32028": {
254
+ "content": "<|special_26|>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "32029": {
262
+ "content": "<|special_27|>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "32030": {
270
+ "content": "<|special_28|>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "32031": {
278
+ "content": "<|special_29|>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "32032": {
286
+ "content": "<|special_30|>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ },
293
+ "32033": {
294
+ "content": "<|special_31|>",
295
+ "lstrip": false,
296
+ "normalized": false,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": true
300
+ },
301
+ "32034": {
302
+ "content": "<|special_32|>",
303
+ "lstrip": false,
304
+ "normalized": false,
305
+ "rstrip": false,
306
+ "single_word": false,
307
+ "special": true
308
+ },
309
+ "32035": {
310
+ "content": "<|special_33|>",
311
+ "lstrip": false,
312
+ "normalized": false,
313
+ "rstrip": false,
314
+ "single_word": false,
315
+ "special": true
316
+ },
317
+ "32036": {
318
+ "content": "<|special_34|>",
319
+ "lstrip": false,
320
+ "normalized": false,
321
+ "rstrip": false,
322
+ "single_word": false,
323
+ "special": true
324
+ },
325
+ "32037": {
326
+ "content": "<|special_35|>",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false,
331
+ "special": true
332
+ },
333
+ "32038": {
334
+ "content": "<|special_36|>",
335
+ "lstrip": false,
336
+ "normalized": false,
337
+ "rstrip": false,
338
+ "single_word": false,
339
+ "special": true
340
+ },
341
+ "32039": {
342
+ "content": "<|special_37|>",
343
+ "lstrip": false,
344
+ "normalized": false,
345
+ "rstrip": false,
346
+ "single_word": false,
347
+ "special": true
348
+ },
349
+ "32040": {
350
+ "content": "<|special_38|>",
351
+ "lstrip": false,
352
+ "normalized": false,
353
+ "rstrip": false,
354
+ "single_word": false,
355
+ "special": true
356
+ },
357
+ "32041": {
358
+ "content": "<|special_39|>",
359
+ "lstrip": false,
360
+ "normalized": false,
361
+ "rstrip": false,
362
+ "single_word": false,
363
+ "special": true
364
+ },
365
+ "32042": {
366
+ "content": "<|special_40|>",
367
+ "lstrip": false,
368
+ "normalized": false,
369
+ "rstrip": false,
370
+ "single_word": false,
371
+ "special": true
372
+ },
373
+ "32043": {
374
+ "content": "<|special_41|>",
375
+ "lstrip": false,
376
+ "normalized": false,
377
+ "rstrip": false,
378
+ "single_word": false,
379
+ "special": true
380
+ },
381
+ "32044": {
382
+ "content": "<|special_42|>",
383
+ "lstrip": false,
384
+ "normalized": false,
385
+ "rstrip": false,
386
+ "single_word": false,
387
+ "special": true
388
+ },
389
+ "32045": {
390
+ "content": "<|special_43|>",
391
+ "lstrip": false,
392
+ "normalized": false,
393
+ "rstrip": false,
394
+ "single_word": false,
395
+ "special": true
396
+ },
397
+ "32046": {
398
+ "content": "<|special_44|>",
399
+ "lstrip": false,
400
+ "normalized": false,
401
+ "rstrip": false,
402
+ "single_word": false,
403
+ "special": true
404
+ },
405
+ "32047": {
406
+ "content": "<|special_45|>",
407
+ "lstrip": false,
408
+ "normalized": false,
409
+ "rstrip": false,
410
+ "single_word": false,
411
+ "special": true
412
+ },
413
+ "32048": {
414
+ "content": "<|special_46|>",
415
+ "lstrip": false,
416
+ "normalized": false,
417
+ "rstrip": false,
418
+ "single_word": false,
419
+ "special": true
420
+ },
421
+ "32049": {
422
+ "content": "<|special_47|>",
423
+ "lstrip": false,
424
+ "normalized": false,
425
+ "rstrip": false,
426
+ "single_word": false,
427
+ "special": true
428
+ },
429
+ "32050": {
430
+ "content": "<|special_48|>",
431
+ "lstrip": false,
432
+ "normalized": false,
433
+ "rstrip": false,
434
+ "single_word": false,
435
+ "special": true
436
+ },
437
+ "32051": {
438
+ "content": "<|special_49|>",
439
+ "lstrip": false,
440
+ "normalized": false,
441
+ "rstrip": false,
442
+ "single_word": false,
443
+ "special": true
444
+ },
445
+ "32052": {
446
+ "content": "<|special_50|>",
447
+ "lstrip": false,
448
+ "normalized": false,
449
+ "rstrip": false,
450
+ "single_word": false,
451
+ "special": true
452
+ },
453
+ "32053": {
454
+ "content": "<|special_51|>",
455
+ "lstrip": false,
456
+ "normalized": false,
457
+ "rstrip": false,
458
+ "single_word": false,
459
+ "special": true
460
+ },
461
+ "32054": {
462
+ "content": "<|special_52|>",
463
+ "lstrip": false,
464
+ "normalized": false,
465
+ "rstrip": false,
466
+ "single_word": false,
467
+ "special": true
468
+ },
469
+ "32055": {
470
+ "content": "<|special_53|>",
471
+ "lstrip": false,
472
+ "normalized": false,
473
+ "rstrip": false,
474
+ "single_word": false,
475
+ "special": true
476
+ },
477
+ "32056": {
478
+ "content": "<|special_54|>",
479
+ "lstrip": false,
480
+ "normalized": false,
481
+ "rstrip": false,
482
+ "single_word": false,
483
+ "special": true
484
+ },
485
+ "32057": {
486
+ "content": "<|special_55|>",
487
+ "lstrip": false,
488
+ "normalized": false,
489
+ "rstrip": false,
490
+ "single_word": false,
491
+ "special": true
492
+ },
493
+ "32058": {
494
+ "content": "<|special_56|>",
495
+ "lstrip": false,
496
+ "normalized": false,
497
+ "rstrip": false,
498
+ "single_word": false,
499
+ "special": true
500
+ },
501
+ "32059": {
502
+ "content": "<|special_57|>",
503
+ "lstrip": false,
504
+ "normalized": false,
505
+ "rstrip": false,
506
+ "single_word": false,
507
+ "special": true
508
+ },
509
+ "32060": {
510
+ "content": "<|special_58|>",
511
+ "lstrip": false,
512
+ "normalized": false,
513
+ "rstrip": false,
514
+ "single_word": false,
515
+ "special": true
516
+ },
517
+ "32061": {
518
+ "content": "<|special_59|>",
519
+ "lstrip": false,
520
+ "normalized": false,
521
+ "rstrip": false,
522
+ "single_word": false,
523
+ "special": true
524
+ },
525
+ "32062": {
526
+ "content": "<|special_60|>",
527
+ "lstrip": false,
528
+ "normalized": false,
529
+ "rstrip": false,
530
+ "single_word": false,
531
+ "special": true
532
+ },
533
+ "32063": {
534
+ "content": "<|special_61|>",
535
+ "lstrip": false,
536
+ "normalized": false,
537
+ "rstrip": false,
538
+ "single_word": false,
539
+ "special": true
540
  }
541
  },
542
+ "additional_special_tokens": [
543
+ "<|im_start|>",
544
+ "<|im_end|>",
545
+ "<|special_0|>",
546
+ "<|special_1|>",
547
+ "<|special_2|>",
548
+ "<|special_3|>",
549
+ "<|special_4|>",
550
+ "<|special_5|>",
551
+ "<|special_6|>",
552
+ "<|special_7|>",
553
+ "<|special_8|>",
554
+ "<|special_9|>",
555
+ "<|special_10|>",
556
+ "<|special_11|>",
557
+ "<|special_12|>",
558
+ "<|special_13|>",
559
+ "<|special_14|>",
560
+ "<|special_15|>",
561
+ "<|special_16|>",
562
+ "<|special_17|>",
563
+ "<|special_18|>",
564
+ "<|special_19|>",
565
+ "<|special_20|>",
566
+ "<|special_21|>",
567
+ "<|special_22|>",
568
+ "<|special_23|>",
569
+ "<|special_24|>",
570
+ "<|special_25|>",
571
+ "<|special_26|>",
572
+ "<|special_27|>",
573
+ "<|special_28|>",
574
+ "<|special_29|>",
575
+ "<|special_30|>",
576
+ "<|special_31|>",
577
+ "<|special_32|>",
578
+ "<|special_33|>",
579
+ "<|special_34|>",
580
+ "<|special_35|>",
581
+ "<|special_36|>",
582
+ "<|special_37|>",
583
+ "<|special_38|>",
584
+ "<|special_39|>",
585
+ "<|special_40|>",
586
+ "<|special_41|>",
587
+ "<|special_42|>",
588
+ "<|special_43|>",
589
+ "<|special_44|>",
590
+ "<|special_45|>",
591
+ "<|special_46|>",
592
+ "<|special_47|>",
593
+ "<|special_48|>",
594
+ "<|special_49|>",
595
+ "<|special_50|>",
596
+ "<|special_51|>",
597
+ "<|special_52|>",
598
+ "<|special_53|>",
599
+ "<|special_54|>",
600
+ "<|special_55|>",
601
+ "<|special_56|>",
602
+ "<|special_57|>",
603
+ "<|special_58|>",
604
+ "<|special_59|>",
605
+ "<|special_60|>",
606
+ "<|special_61|>"
607
+ ],
608
  "bos_token": "<s>",
609
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
610
  "clean_up_tokenization_spaces": false,
611
  "eos_token": "</s>",
612
  "legacy": true,
 
613
  "model_max_length": 1000000000000000019884624838656,
614
  "pad_token": "<unk>",
615
+ "padding_side": "right",
616
  "sp_model_kwargs": {},
617
  "spaces_between_special_tokens": false,
 
618
  "tokenizer_class": "LlamaTokenizer",
 
 
619
  "unk_token": "<unk>",
620
  "use_default_system_prompt": false
621
  }