Gurveer05 commited on
Commit
8f94303
·
verified ·
1 Parent(s): 95dcba8

Add new SentenceTransformer model.

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": true,
4
- "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: Alibaba-NLP/gte-base-en-v1.5
3
  library_name: sentence-transformers
4
  pipeline_tag: sentence-similarity
5
  tags:
@@ -174,17 +174,17 @@ widget:
174
  - Thinks x = y is an axis
175
  ---
176
 
177
- # SentenceTransformer based on Alibaba-NLP/gte-base-en-v1.5
178
 
179
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
180
 
181
  ## Model Details
182
 
183
  ### Model Description
184
  - **Model Type:** Sentence Transformer
185
- - **Base model:** [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) <!-- at revision a8e4f3e0ee719c75bc30d12b8eae0f8440502718 -->
186
- - **Maximum Sequence Length:** 8192 tokens
187
- - **Output Dimensionality:** 768 tokens
188
  - **Similarity Function:** Cosine Similarity
189
  - **Training Dataset:**
190
  - csv
@@ -201,8 +201,9 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
201
 
202
  ```
203
  SentenceTransformer(
204
- (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
205
- (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
206
  )
207
  ```
208
 
@@ -230,7 +231,7 @@ sentences = [
230
  ]
231
  embeddings = model.encode(sentences)
232
  print(embeddings.shape)
233
- # [3, 768]
234
 
235
  # Get the similarity scores for the embeddings
236
  similarities = model.similarity(embeddings, embeddings)
@@ -284,10 +285,10 @@ You can finetune this model on your own dataset.
284
  * Size: 12,210 training samples
285
  * Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
286
  * Approximate statistics based on the first 1000 samples:
287
- | | qa_pair_text | MisconceptionName | negative |
288
- |:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
289
- | type | string | string | string |
290
- | details | <ul><li>min: 54 tokens</li><li>mean: 124.3 tokens</li><li>max: 618 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.16 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 14.49 tokens</li><li>max: 40 tokens</li></ul> |
291
  * Samples:
292
  | qa_pair_text | MisconceptionName | negative |
293
  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
@@ -310,10 +311,10 @@ You can finetune this model on your own dataset.
310
  * Size: 9,640 evaluation samples
311
  * Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
312
  * Approximate statistics based on the first 1000 samples:
313
- | | qa_pair_text | MisconceptionName | negative |
314
- |:--------|:--------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
315
- | type | string | string | string |
316
- | details | <ul><li>min: 56 tokens</li><li>mean: 123.29 tokens</li><li>max: 1092 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.51 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.86 tokens</li><li>max: 40 tokens</li></ul> |
317
  * Samples:
318
  | qa_pair_text | MisconceptionName | negative |
319
  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -334,12 +335,12 @@ You can finetune this model on your own dataset.
334
  - `eval_strategy`: steps
335
  - `per_device_train_batch_size`: 32
336
  - `per_device_eval_batch_size`: 32
337
- - `gradient_accumulation_steps`: 32
338
  - `learning_rate`: 1e-05
339
  - `weight_decay`: 0.01
340
- - `num_train_epochs`: 20
341
  - `lr_scheduler_type`: cosine
342
- - `lr_scheduler_kwargs`: {'num_cycles': 10}
343
  - `warmup_ratio`: 0.1
344
  - `fp16`: True
345
  - `load_best_model_at_end`: True
@@ -358,7 +359,7 @@ You can finetune this model on your own dataset.
358
  - `per_device_eval_batch_size`: 32
359
  - `per_gpu_train_batch_size`: None
360
  - `per_gpu_eval_batch_size`: None
361
- - `gradient_accumulation_steps`: 32
362
  - `eval_accumulation_steps`: None
363
  - `torch_empty_cache_steps`: None
364
  - `learning_rate`: 1e-05
@@ -367,10 +368,10 @@ You can finetune this model on your own dataset.
367
  - `adam_beta2`: 0.999
368
  - `adam_epsilon`: 1e-08
369
  - `max_grad_norm`: 1.0
370
- - `num_train_epochs`: 20
371
  - `max_steps`: -1
372
  - `lr_scheduler_type`: cosine
373
- - `lr_scheduler_kwargs`: {'num_cycles': 10}
374
  - `warmup_ratio`: 0.1
375
  - `warmup_steps`: 0
376
  - `log_level`: passive
@@ -465,41 +466,66 @@ You can finetune this model on your own dataset.
465
  </details>
466
 
467
  ### Training Logs
468
- | Epoch | Step | Training Loss | loss |
469
- |:-----------:|:------:|:-------------:|:----------:|
470
- | 0.5026 | 3 | 2.9133 | - |
471
- | 1.0052 | 6 | 2.5832 | 2.1410 |
472
- | 1.4974 | 9 | 2.2895 | - |
473
- | 2.0 | 12 | 2.0454 | 1.7594 |
474
- | 2.4921 | 15 | 1.8939 | - |
475
- | 2.9948 | 18 | 1.8752 | 1.6653 |
476
- | 3.4869 | 21 | 1.7731 | - |
477
- | 3.9895 | 24 | 1.6771 | 1.4987 |
478
- | 4.4817 | 27 | 1.6388 | - |
479
- | 4.9843 | 30 | 1.5924 | 1.3795 |
480
- | 5.4764 | 33 | 1.4895 | - |
481
- | 5.9791 | 36 | 1.4837 | 1.3370 |
482
- | 6.4712 | 39 | 1.4183 | - |
483
- | 6.9738 | 42 | 1.3677 | 1.2660 |
484
- | 7.4660 | 45 | 1.3165 | - |
485
- | 7.9686 | 48 | 1.3034 | 1.2091 |
486
- | 8.4607 | 51 | 1.199 | - |
487
- | 8.9634 | 54 | 1.2276 | 1.1851 |
488
- | 9.4555 | 57 | 1.1421 | - |
489
- | 9.9581 | 60 | 1.1234 | 1.1398 |
490
- | 10.4503 | 63 | 1.0703 | - |
491
- | 10.9529 | 66 | 1.0716 | 1.1000 |
492
- | 11.4450 | 69 | 0.9864 | - |
493
- | 11.9476 | 72 | 1.0047 | 1.0839 |
494
- | 12.4398 | 75 | 0.9381 | - |
495
- | 12.9424 | 78 | 0.9298 | 1.0559 |
496
- | 13.4346 | 81 | 0.8725 | - |
497
- | 13.9372 | 84 | 0.8813 | 1.0333 |
498
- | 14.4293 | 87 | 0.7988 | - |
499
- | 14.9319 | 90 | 0.8256 | 1.0245 |
500
- | 15.4241 | 93 | 0.7617 | - |
501
- | **15.9267** | **96** | **0.7551** | **1.0121** |
502
- | 16.4188 | 99 | 0.713 | - |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
  * The bold row denotes the saved checkpoint.
505
 
 
1
  ---
2
+ base_model: sentence-transformers/all-MiniLM-L6-v2
3
  library_name: sentence-transformers
4
  pipeline_tag: sentence-similarity
5
  tags:
 
174
  - Thinks x = y is an axis
175
  ---
176
 
177
+ # SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
178
 
179
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the csv dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
180
 
181
  ## Model Details
182
 
183
  ### Model Description
184
  - **Model Type:** Sentence Transformer
185
+ - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision 8b3219a92973c328a8e22fadcfa821b5dc75636a -->
186
+ - **Maximum Sequence Length:** 256 tokens
187
+ - **Output Dimensionality:** 384 tokens
188
  - **Similarity Function:** Cosine Similarity
189
  - **Training Dataset:**
190
  - csv
 
201
 
202
  ```
203
  SentenceTransformer(
204
+ (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
205
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
206
+ (2): Normalize()
207
  )
208
  ```
209
 
 
231
  ]
232
  embeddings = model.encode(sentences)
233
  print(embeddings.shape)
234
+ # [3, 384]
235
 
236
  # Get the similarity scores for the embeddings
237
  similarities = model.similarity(embeddings, embeddings)
 
285
  * Size: 12,210 training samples
286
  * Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
287
  * Approximate statistics based on the first 1000 samples:
288
+ | | qa_pair_text | MisconceptionName | negative |
289
+ |:--------|:-------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
290
+ | type | string | string | string |
291
+ | details | <ul><li>min: 54 tokens</li><li>mean: 121.45 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.16 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 14.49 tokens</li><li>max: 40 tokens</li></ul> |
292
  * Samples:
293
  | qa_pair_text | MisconceptionName | negative |
294
  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
 
311
  * Size: 9,640 evaluation samples
312
  * Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
313
  * Approximate statistics based on the first 1000 samples:
314
+ | | qa_pair_text | MisconceptionName | negative |
315
+ |:--------|:-------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
316
+ | type | string | string | string |
317
+ | details | <ul><li>min: 56 tokens</li><li>mean: 119.35 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.51 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.86 tokens</li><li>max: 40 tokens</li></ul> |
318
  * Samples:
319
  | qa_pair_text | MisconceptionName | negative |
320
  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
 
335
  - `eval_strategy`: steps
336
  - `per_device_train_batch_size`: 32
337
  - `per_device_eval_batch_size`: 32
338
+ - `gradient_accumulation_steps`: 8
339
  - `learning_rate`: 1e-05
340
  - `weight_decay`: 0.01
341
+ - `num_train_epochs`: 40
342
  - `lr_scheduler_type`: cosine
343
+ - `lr_scheduler_kwargs`: {'num_cycles': 20}
344
  - `warmup_ratio`: 0.1
345
  - `fp16`: True
346
  - `load_best_model_at_end`: True
 
359
  - `per_device_eval_batch_size`: 32
360
  - `per_gpu_train_batch_size`: None
361
  - `per_gpu_eval_batch_size`: None
362
+ - `gradient_accumulation_steps`: 8
363
  - `eval_accumulation_steps`: None
364
  - `torch_empty_cache_steps`: None
365
  - `learning_rate`: 1e-05
 
368
  - `adam_beta2`: 0.999
369
  - `adam_epsilon`: 1e-08
370
  - `max_grad_norm`: 1.0
371
+ - `num_train_epochs`: 40
372
  - `max_steps`: -1
373
  - `lr_scheduler_type`: cosine
374
+ - `lr_scheduler_kwargs`: {'num_cycles': 20}
375
  - `warmup_ratio`: 0.1
376
  - `warmup_steps`: 0
377
  - `log_level`: passive
 
466
  </details>
467
 
468
  ### Training Logs
469
+ | Epoch | Step | Training Loss | loss |
470
+ |:-----------:|:-------:|:-------------:|:----------:|
471
+ | 0.5026 | 12 | 2.2789 | - |
472
+ | 1.0052 | 24 | 2.1642 | 1.9746 |
473
+ | 1.4974 | 36 | 2.0463 | - |
474
+ | 2.0 | 48 | 1.8955 | 1.6808 |
475
+ | 2.4921 | 60 | 1.7692 | - |
476
+ | 2.9948 | 72 | 1.6528 | 1.4532 |
477
+ | 3.4869 | 84 | 1.5298 | - |
478
+ | 3.9895 | 96 | 1.4338 | 1.2853 |
479
+ | 4.4817 | 108 | 1.3374 | - |
480
+ | 4.9843 | 120 | 1.3084 | 1.2465 |
481
+ | 5.4764 | 132 | 1.2921 | - |
482
+ | 5.9791 | 144 | 1.2143 | 1.1766 |
483
+ | 6.4712 | 156 | 1.1689 | - |
484
+ | 6.9738 | 168 | 1.1656 | 1.1518 |
485
+ | 7.4660 | 180 | 1.1172 | - |
486
+ | 7.9686 | 192 | 1.0737 | 1.1080 |
487
+ | 8.4607 | 204 | 1.0373 | - |
488
+ | 8.9634 | 216 | 1.0445 | 1.0874 |
489
+ | 9.4555 | 228 | 0.9707 | - |
490
+ | 9.9581 | 240 | 0.9644 | 1.0649 |
491
+ | 10.4503 | 252 | 0.9252 | - |
492
+ | 10.9529 | 264 | 0.9211 | 1.0367 |
493
+ | 11.4450 | 276 | 0.8645 | - |
494
+ | 11.9476 | 288 | 0.8635 | 1.0297 |
495
+ | 12.4398 | 300 | 0.8279 | - |
496
+ | 12.9424 | 312 | 0.819 | 1.0161 |
497
+ | 13.4346 | 324 | 0.7684 | - |
498
+ | 13.9372 | 336 | 0.7842 | 1.0016 |
499
+ | 14.4293 | 348 | 0.7448 | - |
500
+ | 14.9319 | 360 | 0.7321 | 0.9951 |
501
+ | 15.4241 | 372 | 0.7064 | - |
502
+ | 15.9267 | 384 | 0.7161 | 0.9835 |
503
+ | 16.4188 | 396 | 0.6692 | - |
504
+ | 16.9215 | 408 | 0.6594 | 0.9774 |
505
+ | 17.4136 | 420 | 0.6405 | - |
506
+ | 17.9162 | 432 | 0.638 | 0.9723 |
507
+ | 18.4084 | 444 | 0.6 | - |
508
+ | 18.9110 | 456 | 0.6122 | 0.9706 |
509
+ | 19.4031 | 468 | 0.5763 | - |
510
+ | 19.9058 | 480 | 0.5787 | 0.9732 |
511
+ | 20.3979 | 492 | 0.5432 | - |
512
+ | 20.9005 | 504 | 0.5599 | 0.9618 |
513
+ | 21.3927 | 516 | 0.5245 | - |
514
+ | 21.8953 | 528 | 0.5278 | 0.9626 |
515
+ | 22.3874 | 540 | 0.4989 | - |
516
+ | 22.8901 | 552 | 0.509 | 0.9583 |
517
+ | 23.3822 | 564 | 0.4674 | - |
518
+ | **23.8848** | **576** | **0.4854** | **0.9573** |
519
+ | 24.3770 | 588 | 0.4619 | - |
520
+ | 24.8796 | 600 | 0.4631 | 0.9615 |
521
+ | 25.3717 | 612 | 0.4339 | - |
522
+ | 25.8743 | 624 | 0.4427 | 0.9593 |
523
+ | 26.3665 | 636 | 0.4225 | - |
524
+ | 26.8691 | 648 | 0.4245 | 0.9694 |
525
+ | 27.3613 | 660 | 0.3936 | - |
526
+ | 27.8639 | 672 | 0.4168 | 0.9586 |
527
+ | 28.3560 | 684 | 0.3835 | - |
528
+ | 28.8586 | 696 | 0.3921 | 0.9629 |
529
 
530
  * The bold row denotes the saved checkpoint.
531
 
config.json CHANGED
@@ -1,44 +1,26 @@
1
  {
2
- "_name_or_path": "Alibaba-NLP/gte-base-en-v1.5",
3
  "architectures": [
4
- "NewModel"
5
  ],
6
- "attention_probs_dropout_prob": 0.0,
7
- "auto_map": {
8
- "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
9
- "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
10
- "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
11
- "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
12
- "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
13
- "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
14
- "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
15
- },
16
  "classifier_dropout": null,
 
17
  "hidden_act": "gelu",
18
  "hidden_dropout_prob": 0.1,
19
- "hidden_size": 768,
20
  "initializer_range": 0.02,
21
- "intermediate_size": 3072,
22
  "layer_norm_eps": 1e-12,
23
- "layer_norm_type": "layer_norm",
24
- "logn_attention_clip1": false,
25
- "logn_attention_scale": false,
26
- "max_position_embeddings": 8192,
27
- "model_type": "new",
28
  "num_attention_heads": 12,
29
- "num_hidden_layers": 12,
30
- "pack_qkv": true,
31
  "pad_token_id": 0,
32
- "position_embedding_type": "rope",
33
- "rope_scaling": {
34
- "factor": 2.0,
35
- "type": "ntk"
36
- },
37
- "rope_theta": 500000,
38
  "torch_dtype": "float32",
39
  "transformers_version": "4.44.0",
40
- "type_vocab_size": 0,
41
- "unpad_inputs": false,
42
- "use_memory_efficient_attention": false,
43
- "vocab_size": 30528
44
  }
 
1
  {
2
+ "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
3
  "architectures": [
4
+ "BertModel"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
 
 
 
 
 
 
 
 
 
7
  "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
  "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
 
 
 
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
 
19
  "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
 
 
 
 
 
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.44.0",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
 
26
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d10283288ad7a6ce66a5175fb51467f4d3b6f939ee20bd89479c7bab36286f8
3
- size 547119128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc81fcda60ac280c966d16b9cc07ebe8f5e13619f4caa4425c7f6e5dd344a91
3
+ size 90864192
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 8192,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 256,
3
  "do_lower_case": false
4
  }
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 8192,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 256,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -43,10 +43,12 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
- "max_length": 512,
49
- "model_max_length": 8192,
 
50
  "pad_to_multiple_of": null,
51
  "pad_token": "[PAD]",
52
  "pad_token_type_id": 0,
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "mask_token": "[MASK]",
49
+ "max_length": 128,
50
+ "model_max_length": 256,
51
+ "never_split": null,
52
  "pad_to_multiple_of": null,
53
  "pad_token": "[PAD]",
54
  "pad_token_type_id": 0,