tasksource
/

ModernBERT-base-embed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "tasksource/ModernBERT-base-nli",
   "architectures": [
     "ModernBertModel"
   ],
@@ -10,116 +10,6 @@
   "classifier_bias": false,
   "classifier_dropout": 0.0,
   "classifier_pooling": "mean",
-  "classifiers_size": [
-    3,
-    2,
-    2,
-    2,
-    2,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    6,
-    2,
-    2,
-    2,
-    2,
-    2,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2,
-    2,
-    2,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2,
-    2,
-    2,
-    3,
-    2,
-    4,
-    3,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    3,
-    2,
-    3,
-    2,
-    4,
-    3,
-    3,
-    3,
-    2,
-    3,
-    1,
-    2,
-    2,
-    3,
-    13,
-    2,
-    3,
-    2,
-    2,
-    3,
-    3,
-    2,
-    3,
-    3,
-    2,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    3,
-    4,
-    3,
-    3,
-    2,
-    2,
-    3,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    4,
-    3,
-    2,
-    2,
-    3
-  ],
   "cls_token_id": 50281,
   "decoder_bias": true,
   "deterministic_flash_attn": false,
@@ -130,23 +20,13 @@
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
   "hidden_size": 768,
-  "id2label": {
-    "0": "entailment",
-    "1": "neutral",
-    "2": "contradiction"
-  },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
   "intermediate_size": 1152,
-  "label2id": {
-    "contradiction": 2,
-    "entailment": 0,
-    "neutral": 1
-  },
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "local_rope_theta": 10000.0,
-  "max_position_embeddings": 2048,
   "mlp_bias": false,
   "mlp_dropout": 0.0,
   "model_type": "modernbert",
@@ -156,121 +36,10 @@
   "num_hidden_layers": 22,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
-  "problem_type": "single_label_classification",
   "reference_compile": true,
   "sep_token_id": 50282,
   "sparse_pred_ignore_index": -100,
   "sparse_prediction": false,
-  "tasks": [
-    "glue/mnli",
-    "glue/qnli",
-    "glue/rte",
-    "glue/wnli",
-    "super_glue/boolq",
-    "super_glue/cb",
-    "anli/a1",
-    "anli/a2",
-    "anli/a3",
-    "sick/label",
-    "sick/entailment_AB",
-    "snli",
-    "scitail/snli_format",
-    "hans",
-    "WANLI",
-    "recast/recast_sentiment",
-    "recast/recast_verbcorner",
-    "recast/recast_ner",
-    "recast/recast_factuality",
-    "recast/recast_puns",
-    "recast/recast_kg_relations",
-    "recast/recast_verbnet",
-    "recast/recast_megaveridicality",
-    "probability_words_nli/usnli",
-    "probability_words_nli/reasoning_1hop",
-    "probability_words_nli/reasoning_2hop",
-    "nan-nli",
-    "nli_fever",
-    "breaking_nli",
-    "conj_nli",
-    "fracas",
-    "dialogue_nli",
-    "mpe",
-    "dnc",
-    "recast_white/fnplus",
-    "recast_white/sprl",
-    "recast_white/dpr",
-    "robust_nli/IS_CS",
-    "robust_nli/LI_LI",
-    "robust_nli/ST_WO",
-    "robust_nli/PI_SP",
-    "robust_nli/PI_CD",
-    "robust_nli/ST_SE",
-    "robust_nli/ST_NE",
-    "robust_nli/ST_LM",
-    "robust_nli_is_sd",
-    "robust_nli_li_ts",
-    "add_one_rte",
-    "cycic_classification",
-    "lingnli",
-    "monotonicity-entailment",
-    "scinli",
-    "naturallogic",
-    "syntactic-augmentation-nli",
-    "autotnli",
-    "defeasible-nli/atomic",
-    "defeasible-nli/snli",
-    "help-nli",
-    "nli-veridicality-transitivity",
-    "lonli",
-    "dadc-limit-nli",
-    "folio",
-    "tomi-nli",
-    "puzzte",
-    "temporal-nli",
-    "counterfactually-augmented-snli",
-    "cnli",
-    "boolq-natural-perturbations",
-    "equate",
-    "chaos-mnli-ambiguity",
-    "logiqa-2.0-nli",
-    "mindgames",
-    "ConTRoL-nli",
-    "logical-fallacy",
-    "conceptrules_v2",
-    "zero-shot-label-nli",
-    "scone",
-    "monli",
-    "SpaceNLI",
-    "propsegment/nli",
-    "SDOH-NLI",
-    "scifact_entailment",
-    "AdjectiveScaleProbe-nli",
-    "resnli",
-    "semantic_fragments_nli",
-    "dataset_train_nli",
-    "nlgraph",
-    "ruletaker",
-    "PARARULE-Plus",
-    "logical-entailment",
-    "nope",
-    "LogicNLI",
-    "contract-nli/contractnli_a/seg",
-    "contract-nli/contractnli_b/full",
-    "nli4ct_semeval2024",
-    "biosift-nli",
-    "SIGA-nli",
-    "FOL-nli",
-    "doc-nli",
-    "mctest-nli",
-    "natural-language-satisfiability",
-    "idioms-nli",
-    "lifecycle-entailment",
-    "MSciNLI",
-    "hover-3way/nli",
-    "seahorse_summarization_evaluation",
-    "babi_nli",
-    "gen_debiased_nli"
-  ],
   "torch_dtype": "float32",
   "transformers_version": "4.48.0.dev0",
   "vocab_size": 50368

 {
+  "_name_or_path": "answerdotai/ModernBERT-base",
   "architectures": [
     "ModernBertModel"
   ],
   "classifier_bias": false,
   "classifier_dropout": 0.0,
   "classifier_pooling": "mean",
   "cls_token_id": 50281,
   "decoder_bias": true,
   "deterministic_flash_attn": false,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
   "hidden_size": 768,
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
   "intermediate_size": 1152,
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
   "mlp_bias": false,
   "mlp_dropout": 0.0,
   "model_type": "modernbert",
   "num_hidden_layers": 22,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "reference_compile": true,
   "sep_token_id": 50282,
   "sparse_pred_ignore_index": -100,
   "sparse_prediction": false,
   "torch_dtype": "float32",
   "transformers_version": "4.48.0.dev0",
   "vocab_size": 50368

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f3e4eaefdf2c3a2062d343e925bad3c10166870ec2854b3733a6381b7f465e8
 size 596070136

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6027ea97411457d92dc3a5048481e4fa6859dbaeda6ac805307229298cecbd9
 size 596070136

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "max_seq_length": 2048,
   "do_lower_case": false
 }

 {
+  "max_seq_length": 8192,
   "do_lower_case": false
 }

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 2048,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 8192,
     "strategy": "LongestFirst",
     "stride": 0
   },

tokenizer_config.json CHANGED Viewed

@@ -933,20 +933,13 @@
   "cls_token": "[CLS]",
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
-  "max_length": 2048,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
-  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
   "sep_token": "[SEP]",
-  "stride": 0,
   "tokenizer_class": "PreTrainedTokenizerFast",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

   "cls_token": "[CLS]",
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "PreTrainedTokenizerFast",
   "unk_token": "[UNK]"
 }