xiaoxiaolin commited on Jun 9, 2024

Commit

de80bfe

verified ·

1 Parent(s): 48c0621

Upload folder using huggingface_hub

Browse files

Files changed (40) hide show

checkpoint-1386/config.json +44 -0
checkpoint-1386/model.safetensors +3 -0
checkpoint-1386/optimizer.pt +3 -0
checkpoint-1386/scheduler.pt +3 -0
checkpoint-1386/special_tokens_map.json +37 -0
checkpoint-1386/tokenizer.json +0 -0
checkpoint-1386/tokenizer_config.json +62 -0
checkpoint-1386/trainer_state.json +20 -0
checkpoint-1386/training_args.bin +3 -0
checkpoint-1386/vocab.txt +0 -0
checkpoint-2772/config.json +44 -0
checkpoint-2772/model.safetensors +3 -0
checkpoint-2772/optimizer.pt +3 -0
checkpoint-2772/scheduler.pt +3 -0
checkpoint-2772/special_tokens_map.json +37 -0
checkpoint-2772/tokenizer.json +0 -0
checkpoint-2772/tokenizer_config.json +62 -0
checkpoint-2772/trainer_state.json +20 -0
checkpoint-2772/training_args.bin +3 -0
checkpoint-2772/vocab.txt +0 -0
checkpoint-4158/config.json +44 -0
checkpoint-4158/model.safetensors +3 -0
checkpoint-4158/optimizer.pt +3 -0
checkpoint-4158/scheduler.pt +3 -0
checkpoint-4158/special_tokens_map.json +37 -0
checkpoint-4158/tokenizer.json +0 -0
checkpoint-4158/tokenizer_config.json +62 -0
checkpoint-4158/trainer_state.json +20 -0
checkpoint-4158/training_args.bin +3 -0
checkpoint-4158/vocab.txt +0 -0
config.json +44 -0
custom_info +6 -0
model.safetensors +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
train_results.txt +3 -0
trainer_state.json +28 -0
training_args.bin +3 -0
vocab.txt +0 -0

checkpoint-1386/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

checkpoint-1386/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:baa2f9f37635886e8087ed8ce28354f1f33f383c5ef0a10e44e9fba2341a9dd9
+size 1736561104

checkpoint-1386/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8918551c211a4ff3ee48477319be1b55802d36be7f56215ee89e8e286419cbd
+size 3473287493

checkpoint-1386/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fec90e458c13c25f2367b6f50581f24ffb0c6ccda4ea17f970f9f599b6978fb5
+size 627

checkpoint-1386/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-1386/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1386/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoint-1386/trainer_state.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1386,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 500,
+  "max_steps": 4158,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1386/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53bcb467ae282416feede09e3300466752c00ba2834e3bae1945161f3c4e1224
+size 4091

checkpoint-1386/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-2772/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

checkpoint-2772/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd20ba8123a7a8bd0411a3003142d08aaf3c0232c21c46816da4957b2f7cc755
+size 1736561104

checkpoint-2772/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8fc8459de21242dc97b82414338c2a2e9027e067e90c6ffe25158f3df32502c4
+size 3473287493

checkpoint-2772/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5427dd161d12c622d83829f2d6f852439eea5415f0014923bce1de2cf0d3df81
+size 627

checkpoint-2772/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-2772/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-2772/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoint-2772/trainer_state.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 2772,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 500,
+  "max_steps": 4158,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2772/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53bcb467ae282416feede09e3300466752c00ba2834e3bae1945161f3c4e1224
+size 4091

checkpoint-2772/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4158/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

checkpoint-4158/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9b7cca6a6a536a5b285cda3e8b63327ab4eadf696970ecf5ab9feffea59d165
+size 1736561104

checkpoint-4158/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0d162ab177522dab04b64d81f731454bd618e4f2cb83d14f9d4c07bd3642d55
+size 3473287493

checkpoint-4158/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69ef7ceaf09eb40f4d189be1b807098d880998e95f69fb50091e26f374f8ec6b
+size 627

checkpoint-4158/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-4158/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4158/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoint-4158/trainer_state.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 4158,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 500,
+  "max_steps": 4158,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4158/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53bcb467ae282416feede09e3300466752c00ba2834e3bae1945161f3c4e1224
+size 4091

checkpoint-4158/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

custom_info ADDED Viewed

	@@ -0,0 +1,6 @@

+epoch=0 step=462 progress=0.111 eval info {'sparsity_loss': 0.463, 'l1l2_ratio_z12': 21.317, 'l1l2_ratio_z13': 23.905, 'l1l2_ratio_z13_all': 25.735} train info {'sparsity_loss': 1.53, 'l1l2_ratio_z12': 22.945, 'l1l2_ratio_z13': 25.015, 'l1l2_ratio_z13_all': 25.555}
+epoch=0 step=924 progress=0.222 eval info {'sparsity_loss': 0.283, 'l1l2_ratio_z12': 20.568, 'l1l2_ratio_z13': 23.443, 'l1l2_ratio_z13_all': 25.733} train info {'sparsity_loss': 0.183, 'l1l2_ratio_z12': 21.32, 'l1l2_ratio_z13': 24.705, 'l1l2_ratio_z13_all': 25.737}
+epoch=1 step=462 progress=0.444 eval info {'sparsity_loss': 0.23, 'l1l2_ratio_z12': 17.986, 'l1l2_ratio_z13': 22.782, 'l1l2_ratio_z13_all': 25.554} train info {'sparsity_loss': 0.062, 'l1l2_ratio_z12': 19.674, 'l1l2_ratio_z13': 24.426, 'l1l2_ratio_z13_all': 25.656}
+epoch=1 step=924 progress=0.556 eval info {'sparsity_loss': 0.237, 'l1l2_ratio_z12': 16.011, 'l1l2_ratio_z13': 22.279, 'l1l2_ratio_z13_all': 25.553} train info {'sparsity_loss': 0.03, 'l1l2_ratio_z12': 17.807, 'l1l2_ratio_z13': 24.222, 'l1l2_ratio_z13_all': 25.601}
+epoch=2 step=462 progress=0.778 eval info {'sparsity_loss': 0.246, 'l1l2_ratio_z12': 14.56, 'l1l2_ratio_z13': 22.528, 'l1l2_ratio_z13_all': 25.587} train info {'sparsity_loss': 0.019, 'l1l2_ratio_z12': 16.258, 'l1l2_ratio_z13': 24.266, 'l1l2_ratio_z13_all': 25.601}
+epoch=2 step=924 progress=0.889 eval info {'sparsity_loss': 0.285, 'l1l2_ratio_z12': 13.884, 'l1l2_ratio_z13': 22.263, 'l1l2_ratio_z13_all': 25.6} train info {'sparsity_loss': 0.012, 'l1l2_ratio_z12': 15.213, 'l1l2_ratio_z13': 24.337, 'l1l2_ratio_z13_all': 25.647}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9b7cca6a6a536a5b285cda3e8b63327ab4eadf696970ecf5ab9feffea59d165
+size 1736561104

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

train_results.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+epoch = 3.0
+train_runtime = 17574.8484
+train_samples_per_second = 0.237

trainer_state.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 4158,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.0,
+      "step": 4158,
+      "total_flos": 0,
+      "train_runtime": 17574.8484,
+      "train_samples_per_second": 0.237
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 4158,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53bcb467ae282416feede09e3300466752c00ba2834e3bae1945161f3c4e1224
+size 4091

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff