|
{
|
|
"tokenizer_name": "AutoTokenizer",
|
|
"pretrained_model_name": "AutoModel",
|
|
"vocab": {
|
|
"vocab_size": 30522,
|
|
"model_max_length": 512,
|
|
"padding_side": "right",
|
|
"truncation_side": "right",
|
|
"special_tokens": {
|
|
"pad_token": "[PAD]",
|
|
"unk_token": "[UNK]",
|
|
"cls_token": "[CLS]",
|
|
"sep_token": "[SEP]",
|
|
"mask_token": "[MASK]"
|
|
},
|
|
"tokenizer_type": "WordPiece",
|
|
"lowercase": true,
|
|
"pad_token_id": 0,
|
|
"unk_token_id": 100,
|
|
"cls_token_id": 101,
|
|
"sep_token_id": 102,
|
|
"mask_token_id": 103
|
|
},
|
|
"normalization": {
|
|
"lowercase": true,
|
|
"strip_accents": true
|
|
},
|
|
"preprocessing": {
|
|
"do_lower_case": true,
|
|
"handle_chinese_chars": true
|
|
}
|
|
} |