add model weights

Files changed (12) hide show

config.json +65 -0
merges.txt +0 -0
optimizer.pt +3 -0
pytorch_model.bin +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
trainer_state.json +156 -0
training_args.bin +3 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "_name_or_path": "/mnt/beegfs/mc000051/CERPLES/Models/roberta-base-biomedical-clinical-es",
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "multi_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.19.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 52000
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:319bd8b0b3fc3178d2d78724f5baa91dd85918dbcf71a4c47feafcde2ec91c89
+size 1008039837

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09eaddcf87a41588d71397076bdad7e758ea8e4e74269567a6e22ed43d76b437
+size 504033325

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8dcd8bd2facbcb38c978d6b2b10cbacd8e9f54834a513467b1196a471554682c
+size 14567

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:064f8a5f5dbc314d6628cb6104463ee1d75e86f55b483f7a798a2a982ed80516
+size 623

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "trim_offsets": true, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "/mnt/beegfs/mc000051/CERPLES/Models/roberta-base-biomedical-clinical-es", "tokenizer_class": "RobertaTokenizer"}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+  "best_metric": 0.7442957192288286,
+  "best_model_checkpoint": "./CARES/checkpoints/roberta-/checkpoint-3500",
+  "epoch": 49.29577464788732,
+  "global_step": 3500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 7.04,
+      "learning_rate": 4.647887323943662e-05,
+      "loss": 0.1802,
+      "step": 500
+    },
+    {
+      "epoch": 7.04,
+      "eval_loss": 0.1463283747434616,
+      "eval_macro_f1": 0.38060566625566594,
+      "eval_macro_precision": 0.5037224636955915,
+      "eval_macro_recall": 0.36676192196038643,
+      "eval_micro_f1": 0.7358149620527648,
+      "eval_micro_precision": 0.8249594813614263,
+      "eval_micro_recall": 0.6640574037834311,
+      "eval_runtime": 2.7054,
+      "eval_samples_per_second": 357.063,
+      "eval_steps_per_second": 11.459,
+      "step": 500
+    },
+    {
+      "epoch": 14.08,
+      "learning_rate": 4.295774647887324e-05,
+      "loss": 0.0484,
+      "step": 1000
+    },
+    {
+      "epoch": 14.08,
+      "eval_loss": 0.14033463597297668,
+      "eval_macro_f1": 0.6067981552982502,
+      "eval_macro_precision": 0.6486392263482452,
+      "eval_macro_recall": 0.5884363376224964,
+      "eval_micro_f1": 0.7908386662175816,
+      "eval_micro_precision": 0.8175487465181058,
+      "eval_micro_recall": 0.7658186562296151,
+      "eval_runtime": 2.6975,
+      "eval_samples_per_second": 358.108,
+      "eval_steps_per_second": 11.492,
+      "step": 1000
+    },
+    {
+      "epoch": 21.13,
+      "learning_rate": 3.943661971830986e-05,
+      "loss": 0.0179,
+      "step": 1500
+    },
+    {
+      "epoch": 21.13,
+      "eval_loss": 0.16313208639621735,
+      "eval_macro_f1": 0.6876414143457183,
+      "eval_macro_precision": 0.7316617517332842,
+      "eval_macro_recall": 0.7019290034007357,
+      "eval_micro_f1": 0.7892204042348412,
+      "eval_micro_precision": 0.7765151515151515,
+      "eval_micro_recall": 0.8023483365949119,
+      "eval_runtime": 2.6931,
+      "eval_samples_per_second": 358.699,
+      "eval_steps_per_second": 11.511,
+      "step": 1500
+    },
+    {
+      "epoch": 28.17,
+      "learning_rate": 3.5915492957746486e-05,
+      "loss": 0.0095,
+      "step": 2000
+    },
+    {
+      "epoch": 28.17,
+      "eval_loss": 0.1628233790397644,
+      "eval_macro_f1": 0.7285510286916194,
+      "eval_macro_precision": 0.8247103134528249,
+      "eval_macro_recall": 0.6865275313487541,
+      "eval_micro_f1": 0.8138990978950885,
+      "eval_micro_precision": 0.8342465753424657,
+      "eval_micro_recall": 0.7945205479452054,
+      "eval_runtime": 2.6912,
+      "eval_samples_per_second": 358.946,
+      "eval_steps_per_second": 11.519,
+      "step": 2000
+    },
+    {
+      "epoch": 35.21,
+      "learning_rate": 3.23943661971831e-05,
+      "loss": 0.0055,
+      "step": 2500
+    },
+    {
+      "epoch": 35.21,
+      "eval_loss": 0.1734953373670578,
+      "eval_macro_f1": 0.7421994453880427,
+      "eval_macro_precision": 0.8204432806900727,
+      "eval_macro_recall": 0.7088284387134485,
+      "eval_micro_f1": 0.8176722716782063,
+      "eval_micro_precision": 0.8266666666666667,
+      "eval_micro_recall": 0.8088714938030006,
+      "eval_runtime": 2.6929,
+      "eval_samples_per_second": 358.718,
+      "eval_steps_per_second": 11.512,
+      "step": 2500
+    },
+    {
+      "epoch": 42.25,
+      "learning_rate": 2.887323943661972e-05,
+      "loss": 0.0039,
+      "step": 3000
+    },
+    {
+      "epoch": 42.25,
+      "eval_loss": 0.1867484599351883,
+      "eval_macro_f1": 0.7349168384365183,
+      "eval_macro_precision": 0.7741277072189265,
+      "eval_macro_recall": 0.7306178466824944,
+      "eval_micro_f1": 0.8012924071082391,
+      "eval_micro_precision": 0.793854033290653,
+      "eval_micro_recall": 0.8088714938030006,
+      "eval_runtime": 2.6968,
+      "eval_samples_per_second": 358.208,
+      "eval_steps_per_second": 11.495,
+      "step": 3000
+    },
+    {
+      "epoch": 49.3,
+      "learning_rate": 2.535211267605634e-05,
+      "loss": 0.0032,
+      "step": 3500
+    },
+    {
+      "epoch": 49.3,
+      "eval_loss": 0.181275874376297,
+      "eval_macro_f1": 0.7442957192288286,
+      "eval_macro_precision": 0.7848363899123905,
+      "eval_macro_recall": 0.7360907675141599,
+      "eval_micro_f1": 0.8202794930126746,
+      "eval_micro_precision": 0.8173575129533679,
+      "eval_micro_recall": 0.8232224396607958,
+      "eval_runtime": 2.6934,
+      "eval_samples_per_second": 358.652,
+      "eval_steps_per_second": 11.51,
+      "step": 3500
+    }
+  ],
+  "max_steps": 7100,
+  "num_train_epochs": 100,
+  "total_flos": 2.877157614419203e+16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95ef9e04b1c54450ebb0e5d368ad5f45d662a1cb072d8e0e194111a56c16c7d5
+size 3247

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff