Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +3 -3
- README.md +55 -78
- config.json +31 -13
- model.safetensors +2 -2
- modules.json +0 -6
- sentence_bert_config.json +1 -1
- tokenizer.json +1 -1
- tokenizer_config.json +2 -4
1_Pooling/config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"word_embedding_dimension":
|
3 |
-
"pooling_mode_cls_token":
|
4 |
-
"pooling_mode_mean_tokens":
|
5 |
"pooling_mode_max_tokens": false,
|
6 |
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
"pooling_mode_weightedmean_tokens": false,
|
|
|
1 |
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
"pooling_mode_max_tokens": false,
|
6 |
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
"pooling_mode_weightedmean_tokens": false,
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
base_model:
|
3 |
library_name: sentence-transformers
|
4 |
pipeline_tag: sentence-similarity
|
5 |
tags:
|
@@ -174,17 +174,17 @@ widget:
|
|
174 |
- Thinks x = y is an axis
|
175 |
---
|
176 |
|
177 |
-
# SentenceTransformer based on
|
178 |
|
179 |
-
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [
|
180 |
|
181 |
## Model Details
|
182 |
|
183 |
### Model Description
|
184 |
- **Model Type:** Sentence Transformer
|
185 |
-
- **Base model:** [
|
186 |
-
- **Maximum Sequence Length:**
|
187 |
-
- **Output Dimensionality:**
|
188 |
- **Similarity Function:** Cosine Similarity
|
189 |
- **Training Dataset:**
|
190 |
- csv
|
@@ -201,9 +201,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
|
|
201 |
|
202 |
```
|
203 |
SentenceTransformer(
|
204 |
-
(0): Transformer({'max_seq_length':
|
205 |
-
(1): Pooling({'word_embedding_dimension':
|
206 |
-
(2): Normalize()
|
207 |
)
|
208 |
```
|
209 |
|
@@ -231,7 +230,7 @@ sentences = [
|
|
231 |
]
|
232 |
embeddings = model.encode(sentences)
|
233 |
print(embeddings.shape)
|
234 |
-
# [3,
|
235 |
|
236 |
# Get the similarity scores for the embeddings
|
237 |
similarities = model.similarity(embeddings, embeddings)
|
@@ -285,10 +284,10 @@ You can finetune this model on your own dataset.
|
|
285 |
* Size: 12,210 training samples
|
286 |
* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
|
287 |
* Approximate statistics based on the first 1000 samples:
|
288 |
-
| | qa_pair_text
|
289 |
-
|
290 |
-
| type | string
|
291 |
-
| details | <ul><li>min: 54 tokens</li><li>mean:
|
292 |
* Samples:
|
293 |
| qa_pair_text | MisconceptionName | negative |
|
294 |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
@@ -311,10 +310,10 @@ You can finetune this model on your own dataset.
|
|
311 |
* Size: 9,640 evaluation samples
|
312 |
* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
|
313 |
* Approximate statistics based on the first 1000 samples:
|
314 |
-
| | qa_pair_text
|
315 |
-
|
316 |
-
| type | string
|
317 |
-
| details | <ul><li>min: 56 tokens</li><li>mean:
|
318 |
* Samples:
|
319 |
| qa_pair_text | MisconceptionName | negative |
|
320 |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
|
@@ -335,7 +334,7 @@ You can finetune this model on your own dataset.
|
|
335 |
- `eval_strategy`: steps
|
336 |
- `per_device_train_batch_size`: 32
|
337 |
- `per_device_eval_batch_size`: 32
|
338 |
-
- `gradient_accumulation_steps`:
|
339 |
- `learning_rate`: 1e-05
|
340 |
- `weight_decay`: 0.01
|
341 |
- `num_train_epochs`: 40
|
@@ -359,7 +358,7 @@ You can finetune this model on your own dataset.
|
|
359 |
- `per_device_eval_batch_size`: 32
|
360 |
- `per_gpu_train_batch_size`: None
|
361 |
- `per_gpu_eval_batch_size`: None
|
362 |
-
- `gradient_accumulation_steps`:
|
363 |
- `eval_accumulation_steps`: None
|
364 |
- `torch_empty_cache_steps`: None
|
365 |
- `learning_rate`: 1e-05
|
@@ -468,64 +467,42 @@ You can finetune this model on your own dataset.
|
|
468 |
### Training Logs
|
469 |
| Epoch | Step | Training Loss | loss |
|
470 |
|:-----------:|:-------:|:-------------:|:----------:|
|
471 |
-
| 0.5026 |
|
472 |
-
| 1.0052 |
|
473 |
-
| 1.4974 |
|
474 |
-
| 2.0 |
|
475 |
-
| 2.4921 |
|
476 |
-
| 2.9948 |
|
477 |
-
| 3.4869 |
|
478 |
-
| 3.9895 |
|
479 |
-
| 4.4817 |
|
480 |
-
| 4.9843 |
|
481 |
-
| 5.4764 |
|
482 |
-
| 5.9791 |
|
483 |
-
| 6.4712 |
|
484 |
-
| 6.9738 |
|
485 |
-
| 7.4660 |
|
486 |
-
| 7.9686 |
|
487 |
-
| 8.4607 |
|
488 |
-
| 8.9634 |
|
489 |
-
| 9.4555 |
|
490 |
-
| 9.9581 |
|
491 |
-
| 10.4503 |
|
492 |
-
| 10.9529 |
|
493 |
-
| 11.4450 |
|
494 |
-
| 11.9476 |
|
495 |
-
| 12.4398 |
|
496 |
-
| 12.9424
|
497 |
-
| 13.4346 |
|
498 |
-
| 13.9372 |
|
499 |
-
| 14.4293 |
|
500 |
-
| 14.9319 |
|
501 |
-
| 15.4241 |
|
502 |
-
| 15.9267 |
|
503 |
-
| 16.4188 |
|
504 |
-
| 16.9215 |
|
505 |
-
| 17.4136 |
|
506 |
-
| 17.9162 |
|
507 |
-
| 18.4084 | 444 | 0.6 | - |
|
508 |
-
| 18.9110 | 456 | 0.6122 | 0.9706 |
|
509 |
-
| 19.4031 | 468 | 0.5763 | - |
|
510 |
-
| 19.9058 | 480 | 0.5787 | 0.9732 |
|
511 |
-
| 20.3979 | 492 | 0.5432 | - |
|
512 |
-
| 20.9005 | 504 | 0.5599 | 0.9618 |
|
513 |
-
| 21.3927 | 516 | 0.5245 | - |
|
514 |
-
| 21.8953 | 528 | 0.5278 | 0.9626 |
|
515 |
-
| 22.3874 | 540 | 0.4989 | - |
|
516 |
-
| 22.8901 | 552 | 0.509 | 0.9583 |
|
517 |
-
| 23.3822 | 564 | 0.4674 | - |
|
518 |
-
| **23.8848** | **576** | **0.4854** | **0.9573** |
|
519 |
-
| 24.3770 | 588 | 0.4619 | - |
|
520 |
-
| 24.8796 | 600 | 0.4631 | 0.9615 |
|
521 |
-
| 25.3717 | 612 | 0.4339 | - |
|
522 |
-
| 25.8743 | 624 | 0.4427 | 0.9593 |
|
523 |
-
| 26.3665 | 636 | 0.4225 | - |
|
524 |
-
| 26.8691 | 648 | 0.4245 | 0.9694 |
|
525 |
-
| 27.3613 | 660 | 0.3936 | - |
|
526 |
-
| 27.8639 | 672 | 0.4168 | 0.9586 |
|
527 |
-
| 28.3560 | 684 | 0.3835 | - |
|
528 |
-
| 28.8586 | 696 | 0.3921 | 0.9629 |
|
529 |
|
530 |
* The bold row denotes the saved checkpoint.
|
531 |
|
|
|
1 |
---
|
2 |
+
base_model: Alibaba-NLP/gte-base-en-v1.5
|
3 |
library_name: sentence-transformers
|
4 |
pipeline_tag: sentence-similarity
|
5 |
tags:
|
|
|
174 |
- Thinks x = y is an axis
|
175 |
---
|
176 |
|
177 |
+
# SentenceTransformer based on Alibaba-NLP/gte-base-en-v1.5
|
178 |
|
179 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
180 |
|
181 |
## Model Details
|
182 |
|
183 |
### Model Description
|
184 |
- **Model Type:** Sentence Transformer
|
185 |
+
- **Base model:** [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) <!-- at revision a8e4f3e0ee719c75bc30d12b8eae0f8440502718 -->
|
186 |
+
- **Maximum Sequence Length:** 8192 tokens
|
187 |
+
- **Output Dimensionality:** 768 tokens
|
188 |
- **Similarity Function:** Cosine Similarity
|
189 |
- **Training Dataset:**
|
190 |
- csv
|
|
|
201 |
|
202 |
```
|
203 |
SentenceTransformer(
|
204 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
|
205 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
|
|
206 |
)
|
207 |
```
|
208 |
|
|
|
230 |
]
|
231 |
embeddings = model.encode(sentences)
|
232 |
print(embeddings.shape)
|
233 |
+
# [3, 768]
|
234 |
|
235 |
# Get the similarity scores for the embeddings
|
236 |
similarities = model.similarity(embeddings, embeddings)
|
|
|
284 |
* Size: 12,210 training samples
|
285 |
* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
|
286 |
* Approximate statistics based on the first 1000 samples:
|
287 |
+
| | qa_pair_text | MisconceptionName | negative |
|
288 |
+
|:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
289 |
+
| type | string | string | string |
|
290 |
+
| details | <ul><li>min: 54 tokens</li><li>mean: 124.3 tokens</li><li>max: 618 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.16 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 14.49 tokens</li><li>max: 40 tokens</li></ul> |
|
291 |
* Samples:
|
292 |
| qa_pair_text | MisconceptionName | negative |
|
293 |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
|
|
310 |
* Size: 9,640 evaluation samples
|
311 |
* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
|
312 |
* Approximate statistics based on the first 1000 samples:
|
313 |
+
| | qa_pair_text | MisconceptionName | negative |
|
314 |
+
|:--------|:--------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
315 |
+
| type | string | string | string |
|
316 |
+
| details | <ul><li>min: 56 tokens</li><li>mean: 123.29 tokens</li><li>max: 1092 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.51 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.86 tokens</li><li>max: 40 tokens</li></ul> |
|
317 |
* Samples:
|
318 |
| qa_pair_text | MisconceptionName | negative |
|
319 |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
|
334 |
- `eval_strategy`: steps
|
335 |
- `per_device_train_batch_size`: 32
|
336 |
- `per_device_eval_batch_size`: 32
|
337 |
+
- `gradient_accumulation_steps`: 16
|
338 |
- `learning_rate`: 1e-05
|
339 |
- `weight_decay`: 0.01
|
340 |
- `num_train_epochs`: 40
|
|
|
358 |
- `per_device_eval_batch_size`: 32
|
359 |
- `per_gpu_train_batch_size`: None
|
360 |
- `per_gpu_eval_batch_size`: None
|
361 |
+
- `gradient_accumulation_steps`: 16
|
362 |
- `eval_accumulation_steps`: None
|
363 |
- `torch_empty_cache_steps`: None
|
364 |
- `learning_rate`: 1e-05
|
|
|
467 |
### Training Logs
|
468 |
| Epoch | Step | Training Loss | loss |
|
469 |
|:-----------:|:-------:|:-------------:|:----------:|
|
470 |
+
| 0.5026 | 6 | 2.8901 | - |
|
471 |
+
| 1.0052 | 12 | 2.5455 | 2.1423 |
|
472 |
+
| 1.4974 | 18 | 2.2716 | - |
|
473 |
+
| 2.0 | 24 | 2.0293 | 1.7440 |
|
474 |
+
| 2.4921 | 30 | 1.8326 | - |
|
475 |
+
| 2.9948 | 36 | 1.6703 | 1.4220 |
|
476 |
+
| 3.4869 | 42 | 1.4876 | - |
|
477 |
+
| 3.9895 | 48 | 1.3571 | 1.2232 |
|
478 |
+
| 4.4817 | 54 | 1.2347 | - |
|
479 |
+
| 4.9843 | 60 | 1.2289 | 1.1891 |
|
480 |
+
| 5.4764 | 66 | 1.1551 | - |
|
481 |
+
| 5.9791 | 72 | 1.0629 | 1.1069 |
|
482 |
+
| 6.4712 | 78 | 1.0166 | - |
|
483 |
+
| 6.9738 | 84 | 1.0095 | 1.0651 |
|
484 |
+
| 7.4660 | 90 | 0.8951 | - |
|
485 |
+
| 7.9686 | 96 | 0.8782 | 1.0386 |
|
486 |
+
| 8.4607 | 102 | 0.8305 | - |
|
487 |
+
| 8.9634 | 108 | 0.809 | 1.0174 |
|
488 |
+
| 9.4555 | 114 | 0.7202 | - |
|
489 |
+
| 9.9581 | 120 | 0.7403 | 1.0041 |
|
490 |
+
| 10.4503 | 126 | 0.6737 | - |
|
491 |
+
| 10.9529 | 132 | 0.6499 | 0.9903 |
|
492 |
+
| 11.4450 | 138 | 0.6149 | - |
|
493 |
+
| 11.9476 | 144 | 0.6185 | 0.9889 |
|
494 |
+
| 12.4398 | 150 | 0.5492 | - |
|
495 |
+
| **12.9424** | **156** | **0.5595** | **0.9878** |
|
496 |
+
| 13.4346 | 162 | 0.5146 | - |
|
497 |
+
| 13.9372 | 168 | 0.5097 | 0.9927 |
|
498 |
+
| 14.4293 | 174 | 0.4584 | - |
|
499 |
+
| 14.9319 | 180 | 0.4746 | 0.9912 |
|
500 |
+
| 15.4241 | 186 | 0.4331 | - |
|
501 |
+
| 15.9267 | 192 | 0.424 | 1.0016 |
|
502 |
+
| 16.4188 | 198 | 0.3946 | - |
|
503 |
+
| 16.9215 | 204 | 0.4077 | 1.0002 |
|
504 |
+
| 17.4136 | 210 | 0.366 | - |
|
505 |
+
| 17.9162 | 216 | 0.3721 | 1.0070 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
506 |
|
507 |
* The bold row denotes the saved checkpoint.
|
508 |
|
config.json
CHANGED
@@ -1,26 +1,44 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
-
"attention_probs_dropout_prob": 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
"classifier_dropout": null,
|
8 |
-
"gradient_checkpointing": false,
|
9 |
"hidden_act": "gelu",
|
10 |
"hidden_dropout_prob": 0.1,
|
11 |
-
"hidden_size":
|
12 |
"initializer_range": 0.02,
|
13 |
-
"intermediate_size":
|
14 |
"layer_norm_eps": 1e-12,
|
15 |
-
"
|
16 |
-
"
|
|
|
|
|
|
|
17 |
"num_attention_heads": 12,
|
18 |
-
"num_hidden_layers":
|
|
|
19 |
"pad_token_id": 0,
|
20 |
-
"position_embedding_type": "
|
|
|
|
|
|
|
|
|
|
|
21 |
"torch_dtype": "float32",
|
22 |
"transformers_version": "4.44.0",
|
23 |
-
"type_vocab_size":
|
24 |
-
"
|
25 |
-
"
|
|
|
26 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "Alibaba-NLP/gte-base-en-v1.5",
|
3 |
"architectures": [
|
4 |
+
"NewModel"
|
5 |
],
|
6 |
+
"attention_probs_dropout_prob": 0.0,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
|
9 |
+
"AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
|
10 |
+
"AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
|
11 |
+
"AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
|
12 |
+
"AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
|
13 |
+
"AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
|
14 |
+
"AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
|
15 |
+
},
|
16 |
"classifier_dropout": null,
|
|
|
17 |
"hidden_act": "gelu",
|
18 |
"hidden_dropout_prob": 0.1,
|
19 |
+
"hidden_size": 768,
|
20 |
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 3072,
|
22 |
"layer_norm_eps": 1e-12,
|
23 |
+
"layer_norm_type": "layer_norm",
|
24 |
+
"logn_attention_clip1": false,
|
25 |
+
"logn_attention_scale": false,
|
26 |
+
"max_position_embeddings": 8192,
|
27 |
+
"model_type": "new",
|
28 |
"num_attention_heads": 12,
|
29 |
+
"num_hidden_layers": 12,
|
30 |
+
"pack_qkv": true,
|
31 |
"pad_token_id": 0,
|
32 |
+
"position_embedding_type": "rope",
|
33 |
+
"rope_scaling": {
|
34 |
+
"factor": 2.0,
|
35 |
+
"type": "ntk"
|
36 |
+
},
|
37 |
+
"rope_theta": 500000,
|
38 |
"torch_dtype": "float32",
|
39 |
"transformers_version": "4.44.0",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"unpad_inputs": false,
|
42 |
+
"use_memory_efficient_attention": false,
|
43 |
+
"vocab_size": 30528
|
44 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9a3cff2df37b85a7a0b3b7953d156029625c6b2ee5b6ed8bea9e8f144b64982
|
3 |
+
size 547119128
|
modules.json
CHANGED
@@ -10,11 +10,5 @@
|
|
10 |
"name": "1",
|
11 |
"path": "1_Pooling",
|
12 |
"type": "sentence_transformers.models.Pooling"
|
13 |
-
},
|
14 |
-
{
|
15 |
-
"idx": 2,
|
16 |
-
"name": "2",
|
17 |
-
"path": "2_Normalize",
|
18 |
-
"type": "sentence_transformers.models.Normalize"
|
19 |
}
|
20 |
]
|
|
|
10 |
"name": "1",
|
11 |
"path": "1_Pooling",
|
12 |
"type": "sentence_transformers.models.Pooling"
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
}
|
14 |
]
|
sentence_bert_config.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
-
"max_seq_length":
|
3 |
"do_lower_case": false
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
"do_lower_case": false
|
4 |
}
|
tokenizer.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
-
"max_length":
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
+
"max_length": 8192,
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
tokenizer_config.json
CHANGED
@@ -43,12 +43,10 @@
|
|
43 |
},
|
44 |
"clean_up_tokenization_spaces": true,
|
45 |
"cls_token": "[CLS]",
|
46 |
-
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"mask_token": "[MASK]",
|
49 |
-
"max_length":
|
50 |
-
"model_max_length":
|
51 |
-
"never_split": null,
|
52 |
"pad_to_multiple_of": null,
|
53 |
"pad_token": "[PAD]",
|
54 |
"pad_token_type_id": 0,
|
|
|
43 |
},
|
44 |
"clean_up_tokenization_spaces": true,
|
45 |
"cls_token": "[CLS]",
|
|
|
46 |
"do_lower_case": true,
|
47 |
"mask_token": "[MASK]",
|
48 |
+
"max_length": 512,
|
49 |
+
"model_max_length": 8192,
|
|
|
50 |
"pad_to_multiple_of": null,
|
51 |
"pad_token": "[PAD]",
|
52 |
"pad_token_type_id": 0,
|