Thinh Truong
commited on
Commit
·
447dfe2
1
Parent(s):
73ff1ab
update model files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +56 -0
- all_results.json +14 -0
- config.json +38 -0
- eval_results.json +9 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- roberta2-base-mnli-negnli/README.md +56 -0
- roberta2-base-mnli-negnli/all_results.json +14 -0
- roberta2-base-mnli-negnli/checkpoint-500/config.json +38 -0
- roberta2-base-mnli-negnli/checkpoint-500/merges.txt +0 -0
- roberta2-base-mnli-negnli/checkpoint-500/optimizer.pt +3 -0
- roberta2-base-mnli-negnli/checkpoint-500/pytorch_model.bin +3 -0
- roberta2-base-mnli-negnli/checkpoint-500/rng_state.pth +3 -0
- roberta2-base-mnli-negnli/checkpoint-500/scheduler.pt +3 -0
- roberta2-base-mnli-negnli/checkpoint-500/special_tokens_map.json +15 -0
- roberta2-base-mnli-negnli/checkpoint-500/tokenizer.json +0 -0
- roberta2-base-mnli-negnli/checkpoint-500/tokenizer_config.json +16 -0
- roberta2-base-mnli-negnli/checkpoint-500/trainer_state.json +22 -0
- roberta2-base-mnli-negnli/checkpoint-500/training_args.bin +3 -0
- roberta2-base-mnli-negnli/checkpoint-500/vocab.json +0 -0
- roberta2-base-mnli-negnli/config.json +38 -0
- roberta2-base-mnli-negnli/eval_results.json +9 -0
- roberta2-base-mnli-negnli/merges.txt +0 -0
- roberta2-base-mnli-negnli/pytorch_model.bin +3 -0
- roberta2-base-mnli-negnli/runs/Jul07_23-50-30_doe/1657201850.622683/events.out.tfevents.1657201850.doe.63764.1 +3 -0
- roberta2-base-mnli-negnli/runs/Jul07_23-50-30_doe/events.out.tfevents.1657201850.doe.63764.0 +3 -0
- roberta2-base-mnli-negnli/runs/Jul07_23-50-30_doe/events.out.tfevents.1657201943.doe.63764.2 +3 -0
- roberta2-base-mnli-negnli/runs/Jul07_23-54-02_doe/1657202061.6067924/events.out.tfevents.1657202061.doe.2632.1 +3 -0
- roberta2-base-mnli-negnli/runs/Jul07_23-54-02_doe/events.out.tfevents.1657202061.doe.2632.0 +3 -0
- roberta2-base-mnli-negnli/runs/Jul07_23-54-02_doe/events.out.tfevents.1657202129.doe.2632.2 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_00-57-12_doe/1657205852.4321375/events.out.tfevents.1657205852.doe.4547.1 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_00-57-12_doe/events.out.tfevents.1657205852.doe.4547.0 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_00-57-12_doe/events.out.tfevents.1657206008.doe.4547.2 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-08-17_doe/1657206517.7561595/events.out.tfevents.1657206517.doe.16385.1 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-08-17_doe/events.out.tfevents.1657206517.doe.16385.0 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-08-17_doe/events.out.tfevents.1657206612.doe.16385.2 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-12-22_doe/1657206761.9519951/events.out.tfevents.1657206761.doe.20937.1 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-12-22_doe/events.out.tfevents.1657206761.doe.20937.0 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-12-22_doe/events.out.tfevents.1657206857.doe.20937.2 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-15-21_doe/1657206941.3139427/events.out.tfevents.1657206941.doe.25921.1 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-15-21_doe/events.out.tfevents.1657206941.doe.25921.0 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-15-21_doe/events.out.tfevents.1657207104.doe.25921.2 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-22-31_doe/1657207371.2546418/events.out.tfevents.1657207371.doe.33741.1 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-22-31_doe/events.out.tfevents.1657207371.doe.33741.0 +3 -0
- roberta2-base-mnli-negnli/runs/Jul08_01-22-31_doe/events.out.tfevents.1657207536.doe.33741.2 +3 -0
- roberta2-base-mnli-negnli/special_tokens_map.json +15 -0
- roberta2-base-mnli-negnli/tokenizer.json +0 -0
- roberta2-base-mnli-negnli/tokenizer_config.json +16 -0
- roberta2-base-mnli-negnli/train_results.json +8 -0
- roberta2-base-mnli-negnli/trainer_state.json +31 -0
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
tags:
|
4 |
+
- generated_from_trainer
|
5 |
+
metrics:
|
6 |
+
- accuracy
|
7 |
+
model-index:
|
8 |
+
- name: roberta2-base-mnli-negnli
|
9 |
+
results: []
|
10 |
+
---
|
11 |
+
|
12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
+
should probably proofread and complete it, then remove this comment. -->
|
14 |
+
|
15 |
+
# roberta2-base-mnli-negnli
|
16 |
+
|
17 |
+
This model is a fine-tuned version of [sileod/roberta-base-mnli](https://huggingface.co/sileod/roberta-base-mnli) on an unknown dataset.
|
18 |
+
It achieves the following results on the evaluation set:
|
19 |
+
- Loss: 0.8397
|
20 |
+
- Accuracy: 0.8400
|
21 |
+
|
22 |
+
## Model description
|
23 |
+
|
24 |
+
More information needed
|
25 |
+
|
26 |
+
## Intended uses & limitations
|
27 |
+
|
28 |
+
More information needed
|
29 |
+
|
30 |
+
## Training and evaluation data
|
31 |
+
|
32 |
+
More information needed
|
33 |
+
|
34 |
+
## Training procedure
|
35 |
+
|
36 |
+
### Training hyperparameters
|
37 |
+
|
38 |
+
The following hyperparameters were used during training:
|
39 |
+
- learning_rate: 2e-05
|
40 |
+
- train_batch_size: 8
|
41 |
+
- eval_batch_size: 8
|
42 |
+
- seed: 42
|
43 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
44 |
+
- lr_scheduler_type: linear
|
45 |
+
- num_epochs: 5.0
|
46 |
+
|
47 |
+
### Training results
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
### Framework versions
|
52 |
+
|
53 |
+
- Transformers 4.21.0.dev0
|
54 |
+
- Pytorch 1.8.0
|
55 |
+
- Datasets 1.18.3
|
56 |
+
- Tokenizers 0.12.1
|
all_results.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"eval_accuracy": 0.8399999737739563,
|
4 |
+
"eval_loss": 0.8397448658943176,
|
5 |
+
"eval_runtime": 1.7729,
|
6 |
+
"eval_samples": 300,
|
7 |
+
"eval_samples_per_second": 169.214,
|
8 |
+
"eval_steps_per_second": 21.434,
|
9 |
+
"train_loss": 0.444431884765625,
|
10 |
+
"train_runtime": 158.4081,
|
11 |
+
"train_samples": 1200,
|
12 |
+
"train_samples_per_second": 37.877,
|
13 |
+
"train_steps_per_second": 4.735
|
14 |
+
}
|
config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sileod/roberta-base-mnli",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 768,
|
13 |
+
"id2label": {
|
14 |
+
"0": "CONTRADICTION",
|
15 |
+
"1": "ENTAILMENT",
|
16 |
+
"2": "NEUTRAL"
|
17 |
+
},
|
18 |
+
"initializer_range": 0.02,
|
19 |
+
"intermediate_size": 3072,
|
20 |
+
"label2id": {
|
21 |
+
"CONTRADICTION": 0,
|
22 |
+
"ENTAILMENT": 1,
|
23 |
+
"NEUTRAL": 2
|
24 |
+
},
|
25 |
+
"layer_norm_eps": 1e-05,
|
26 |
+
"max_position_embeddings": 514,
|
27 |
+
"model_type": "roberta",
|
28 |
+
"num_attention_heads": 12,
|
29 |
+
"num_hidden_layers": 12,
|
30 |
+
"pad_token_id": 1,
|
31 |
+
"position_embedding_type": "absolute",
|
32 |
+
"problem_type": "single_label_classification",
|
33 |
+
"torch_dtype": "float32",
|
34 |
+
"transformers_version": "4.21.0.dev0",
|
35 |
+
"type_vocab_size": 1,
|
36 |
+
"use_cache": true,
|
37 |
+
"vocab_size": 50265
|
38 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"eval_accuracy": 0.8399999737739563,
|
4 |
+
"eval_loss": 0.8397448658943176,
|
5 |
+
"eval_runtime": 1.7729,
|
6 |
+
"eval_samples": 300,
|
7 |
+
"eval_samples_per_second": 169.214,
|
8 |
+
"eval_steps_per_second": 21.434
|
9 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f28c6566a80bf80b26a2e5cc10639cbdb067568ff4892835ccb87fe6371ac35b
|
3 |
+
size 498668617
|
roberta2-base-mnli-negnli/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
tags:
|
4 |
+
- generated_from_trainer
|
5 |
+
metrics:
|
6 |
+
- accuracy
|
7 |
+
model-index:
|
8 |
+
- name: roberta2-base-mnli-negnli
|
9 |
+
results: []
|
10 |
+
---
|
11 |
+
|
12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
+
should probably proofread and complete it, then remove this comment. -->
|
14 |
+
|
15 |
+
# roberta2-base-mnli-negnli
|
16 |
+
|
17 |
+
This model is a fine-tuned version of [sileod/roberta-base-mnli](https://huggingface.co/sileod/roberta-base-mnli) on an unknown dataset.
|
18 |
+
It achieves the following results on the evaluation set:
|
19 |
+
- Loss: 0.8397
|
20 |
+
- Accuracy: 0.8400
|
21 |
+
|
22 |
+
## Model description
|
23 |
+
|
24 |
+
More information needed
|
25 |
+
|
26 |
+
## Intended uses & limitations
|
27 |
+
|
28 |
+
More information needed
|
29 |
+
|
30 |
+
## Training and evaluation data
|
31 |
+
|
32 |
+
More information needed
|
33 |
+
|
34 |
+
## Training procedure
|
35 |
+
|
36 |
+
### Training hyperparameters
|
37 |
+
|
38 |
+
The following hyperparameters were used during training:
|
39 |
+
- learning_rate: 2e-05
|
40 |
+
- train_batch_size: 8
|
41 |
+
- eval_batch_size: 8
|
42 |
+
- seed: 42
|
43 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
44 |
+
- lr_scheduler_type: linear
|
45 |
+
- num_epochs: 5.0
|
46 |
+
|
47 |
+
### Training results
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
### Framework versions
|
52 |
+
|
53 |
+
- Transformers 4.21.0.dev0
|
54 |
+
- Pytorch 1.8.0
|
55 |
+
- Datasets 1.18.3
|
56 |
+
- Tokenizers 0.12.1
|
roberta2-base-mnli-negnli/all_results.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"eval_accuracy": 0.8399999737739563,
|
4 |
+
"eval_loss": 0.8397448658943176,
|
5 |
+
"eval_runtime": 1.7729,
|
6 |
+
"eval_samples": 300,
|
7 |
+
"eval_samples_per_second": 169.214,
|
8 |
+
"eval_steps_per_second": 21.434,
|
9 |
+
"train_loss": 0.444431884765625,
|
10 |
+
"train_runtime": 158.4081,
|
11 |
+
"train_samples": 1200,
|
12 |
+
"train_samples_per_second": 37.877,
|
13 |
+
"train_steps_per_second": 4.735
|
14 |
+
}
|
roberta2-base-mnli-negnli/checkpoint-500/config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sileod/roberta-base-mnli",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 768,
|
13 |
+
"id2label": {
|
14 |
+
"0": "CONTRADICTION",
|
15 |
+
"1": "ENTAILMENT",
|
16 |
+
"2": "NEUTRAL"
|
17 |
+
},
|
18 |
+
"initializer_range": 0.02,
|
19 |
+
"intermediate_size": 3072,
|
20 |
+
"label2id": {
|
21 |
+
"CONTRADICTION": 0,
|
22 |
+
"ENTAILMENT": 1,
|
23 |
+
"NEUTRAL": 2
|
24 |
+
},
|
25 |
+
"layer_norm_eps": 1e-05,
|
26 |
+
"max_position_embeddings": 514,
|
27 |
+
"model_type": "roberta",
|
28 |
+
"num_attention_heads": 12,
|
29 |
+
"num_hidden_layers": 12,
|
30 |
+
"pad_token_id": 1,
|
31 |
+
"position_embedding_type": "absolute",
|
32 |
+
"problem_type": "single_label_classification",
|
33 |
+
"torch_dtype": "float32",
|
34 |
+
"transformers_version": "4.21.0.dev0",
|
35 |
+
"type_vocab_size": 1,
|
36 |
+
"use_cache": true,
|
37 |
+
"vocab_size": 50265
|
38 |
+
}
|
roberta2-base-mnli-negnli/checkpoint-500/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
roberta2-base-mnli-negnli/checkpoint-500/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e9551da3c73121e547aa394625b20438e8a11b4245def5cae7805c1bc86e83c
|
3 |
+
size 997309649
|
roberta2-base-mnli-negnli/checkpoint-500/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17766a4377f17b6610496612f147935767e3bc8738819ade326f26428061adb3
|
3 |
+
size 498668617
|
roberta2-base-mnli-negnli/checkpoint-500/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b52f8bf55ed105f0f2d1c981187dc836c4a58af053360add9d9a3266f9fd3b1
|
3 |
+
size 14657
|
roberta2-base-mnli-negnli/checkpoint-500/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:519f1a7aac1e26b93c5dc8fd29b1ba14a602229a9976e9a4f11ec0084c0506e1
|
3 |
+
size 623
|
roberta2-base-mnli-negnli/checkpoint-500/special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
roberta2-base-mnli-negnli/checkpoint-500/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
roberta2-base-mnli-negnli/checkpoint-500/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<s>",
|
4 |
+
"cls_token": "<s>",
|
5 |
+
"eos_token": "</s>",
|
6 |
+
"errors": "replace",
|
7 |
+
"mask_token": "<mask>",
|
8 |
+
"model_max_length": 512,
|
9 |
+
"name_or_path": "roberta-base",
|
10 |
+
"pad_token": "<pad>",
|
11 |
+
"sep_token": "</s>",
|
12 |
+
"special_tokens_map_file": null,
|
13 |
+
"tokenizer_class": "RobertaTokenizer",
|
14 |
+
"trim_offsets": true,
|
15 |
+
"unk_token": "<unk>"
|
16 |
+
}
|
roberta2-base-mnli-negnli/checkpoint-500/trainer_state.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.3333333333333335,
|
5 |
+
"global_step": 500,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 3.33,
|
12 |
+
"learning_rate": 6.666666666666667e-06,
|
13 |
+
"loss": 0.5834,
|
14 |
+
"step": 500
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"max_steps": 750,
|
18 |
+
"num_train_epochs": 5,
|
19 |
+
"total_flos": 263113417728000.0,
|
20 |
+
"trial_name": null,
|
21 |
+
"trial_params": null
|
22 |
+
}
|
roberta2-base-mnli-negnli/checkpoint-500/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d8ac10c632bedfc183e887aabf3ba8bb835486176969bfa525240f0f4f51ac7
|
3 |
+
size 3311
|
roberta2-base-mnli-negnli/checkpoint-500/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
roberta2-base-mnli-negnli/config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sileod/roberta-base-mnli",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 768,
|
13 |
+
"id2label": {
|
14 |
+
"0": "CONTRADICTION",
|
15 |
+
"1": "ENTAILMENT",
|
16 |
+
"2": "NEUTRAL"
|
17 |
+
},
|
18 |
+
"initializer_range": 0.02,
|
19 |
+
"intermediate_size": 3072,
|
20 |
+
"label2id": {
|
21 |
+
"CONTRADICTION": 0,
|
22 |
+
"ENTAILMENT": 1,
|
23 |
+
"NEUTRAL": 2
|
24 |
+
},
|
25 |
+
"layer_norm_eps": 1e-05,
|
26 |
+
"max_position_embeddings": 514,
|
27 |
+
"model_type": "roberta",
|
28 |
+
"num_attention_heads": 12,
|
29 |
+
"num_hidden_layers": 12,
|
30 |
+
"pad_token_id": 1,
|
31 |
+
"position_embedding_type": "absolute",
|
32 |
+
"problem_type": "single_label_classification",
|
33 |
+
"torch_dtype": "float32",
|
34 |
+
"transformers_version": "4.21.0.dev0",
|
35 |
+
"type_vocab_size": 1,
|
36 |
+
"use_cache": true,
|
37 |
+
"vocab_size": 50265
|
38 |
+
}
|
roberta2-base-mnli-negnli/eval_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"eval_accuracy": 0.8399999737739563,
|
4 |
+
"eval_loss": 0.8397448658943176,
|
5 |
+
"eval_runtime": 1.7729,
|
6 |
+
"eval_samples": 300,
|
7 |
+
"eval_samples_per_second": 169.214,
|
8 |
+
"eval_steps_per_second": 21.434
|
9 |
+
}
|
roberta2-base-mnli-negnli/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
roberta2-base-mnli-negnli/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f28c6566a80bf80b26a2e5cc10639cbdb067568ff4892835ccb87fe6371ac35b
|
3 |
+
size 498668617
|
roberta2-base-mnli-negnli/runs/Jul07_23-50-30_doe/1657201850.622683/events.out.tfevents.1657201850.doe.63764.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4eefc6dd2cc4695bd0f63bffbbd0533708dcb84d82c5ca338c92e366c5d64846
|
3 |
+
size 5365
|
roberta2-base-mnli-negnli/runs/Jul07_23-50-30_doe/events.out.tfevents.1657201850.doe.63764.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ee2e90b2d788565553ebd5878c2b5606cb503e2714bbcfe49a35dffcc7e5762
|
3 |
+
size 4215
|
roberta2-base-mnli-negnli/runs/Jul07_23-50-30_doe/events.out.tfevents.1657201943.doe.63764.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee715a065059002451302c27b446ca8dfbc200031d44ea4ed32123de54ab7def
|
3 |
+
size 363
|
roberta2-base-mnli-negnli/runs/Jul07_23-54-02_doe/1657202061.6067924/events.out.tfevents.1657202061.doe.2632.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b96e4de1cc2f0c22feafe25fbde21e41266a99ee6ea55d71ac849a31c99124a
|
3 |
+
size 5365
|
roberta2-base-mnli-negnli/runs/Jul07_23-54-02_doe/events.out.tfevents.1657202061.doe.2632.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55fb8b1e827be004241cdeeaa1afab3479f43c26f94bfd65ab9692aebcdbd76c
|
3 |
+
size 4215
|
roberta2-base-mnli-negnli/runs/Jul07_23-54-02_doe/events.out.tfevents.1657202129.doe.2632.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3d26a7d40ef50f6c4521a21ff0912102037aea093d50638826be6914494d79b
|
3 |
+
size 363
|
roberta2-base-mnli-negnli/runs/Jul08_00-57-12_doe/1657205852.4321375/events.out.tfevents.1657205852.doe.4547.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdea2c00555620bc8d641c32aaea93537577589515f8368c1e48d024dc9101b8
|
3 |
+
size 5365
|
roberta2-base-mnli-negnli/runs/Jul08_00-57-12_doe/events.out.tfevents.1657205852.doe.4547.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:129c745c8128bc9abc69b7d043df76197bdf3f5230e3ef1a61dd519a255acaf4
|
3 |
+
size 4445
|
roberta2-base-mnli-negnli/runs/Jul08_00-57-12_doe/events.out.tfevents.1657206008.doe.4547.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41814716da87e078ad112b437f2d16a1e4f205ed0123fc6710265a37ce042d37
|
3 |
+
size 363
|
roberta2-base-mnli-negnli/runs/Jul08_01-08-17_doe/1657206517.7561595/events.out.tfevents.1657206517.doe.16385.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f986d4bb16a3aeddb300cf944f37db69f563e84c7e08b83238d71c7ae6a31271
|
3 |
+
size 5365
|
roberta2-base-mnli-negnli/runs/Jul08_01-08-17_doe/events.out.tfevents.1657206517.doe.16385.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77ebd2278c753d388b8c5ffcdaf0a98fbff43cd241ef963546610c42ddd22e45
|
3 |
+
size 4215
|
roberta2-base-mnli-negnli/runs/Jul08_01-08-17_doe/events.out.tfevents.1657206612.doe.16385.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b1a60159001ce313ad8ecc1a27c52e24a82f18fb32cad5e979b12d3d1155600
|
3 |
+
size 363
|
roberta2-base-mnli-negnli/runs/Jul08_01-12-22_doe/1657206761.9519951/events.out.tfevents.1657206761.doe.20937.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f73b0655c7c22c157f08feb5fd5b079430c6974f976c602e863d2acc35961f01
|
3 |
+
size 5365
|
roberta2-base-mnli-negnli/runs/Jul08_01-12-22_doe/events.out.tfevents.1657206761.doe.20937.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9750c7e2b2837d2176aba17d8c5f47cf47dc06e73b72a5162d9e6e0208c3c3e7
|
3 |
+
size 4249
|
roberta2-base-mnli-negnli/runs/Jul08_01-12-22_doe/events.out.tfevents.1657206857.doe.20937.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c970e9141055c4f2a314c219a9a8403acd07c5e6d2bc723518a3f1bdfffa343
|
3 |
+
size 363
|
roberta2-base-mnli-negnli/runs/Jul08_01-15-21_doe/1657206941.3139427/events.out.tfevents.1657206941.doe.25921.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e4bc0171d172a263aecf4a88587c394f2e4b13abcd4724fcf2cae98bd8dc1b7
|
3 |
+
size 5365
|
roberta2-base-mnli-negnli/runs/Jul08_01-15-21_doe/events.out.tfevents.1657206941.doe.25921.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2343c15b57f947fe9ddbd3ea1db5f66a111013901d5cacadeddb72c3b82ced6
|
3 |
+
size 4406
|
roberta2-base-mnli-negnli/runs/Jul08_01-15-21_doe/events.out.tfevents.1657207104.doe.25921.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98b4f4109f27eb713949884555036a225f29554c68fd6690d580ca4f7bc19fce
|
3 |
+
size 363
|
roberta2-base-mnli-negnli/runs/Jul08_01-22-31_doe/1657207371.2546418/events.out.tfevents.1657207371.doe.33741.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1285fb859702ce1e7ad8ab9c725d482ebd2ac1c769c61fbfb3aa54a17efe84b9
|
3 |
+
size 5365
|
roberta2-base-mnli-negnli/runs/Jul08_01-22-31_doe/events.out.tfevents.1657207371.doe.33741.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c06ceed443ee396d79106660494c31cdfe84bd675323f59efe192bd3e2915ee5
|
3 |
+
size 4445
|
roberta2-base-mnli-negnli/runs/Jul08_01-22-31_doe/events.out.tfevents.1657207536.doe.33741.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:328a9c6f9adab7a18c86e781f03655c14449695e2135aeb02b3af07aaca7418e
|
3 |
+
size 363
|
roberta2-base-mnli-negnli/special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
roberta2-base-mnli-negnli/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
roberta2-base-mnli-negnli/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<s>",
|
4 |
+
"cls_token": "<s>",
|
5 |
+
"eos_token": "</s>",
|
6 |
+
"errors": "replace",
|
7 |
+
"mask_token": "<mask>",
|
8 |
+
"model_max_length": 512,
|
9 |
+
"name_or_path": "roberta-base",
|
10 |
+
"pad_token": "<pad>",
|
11 |
+
"sep_token": "</s>",
|
12 |
+
"special_tokens_map_file": null,
|
13 |
+
"tokenizer_class": "RobertaTokenizer",
|
14 |
+
"trim_offsets": true,
|
15 |
+
"unk_token": "<unk>"
|
16 |
+
}
|
roberta2-base-mnli-negnli/train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"train_loss": 0.444431884765625,
|
4 |
+
"train_runtime": 158.4081,
|
5 |
+
"train_samples": 1200,
|
6 |
+
"train_samples_per_second": 37.877,
|
7 |
+
"train_steps_per_second": 4.735
|
8 |
+
}
|
roberta2-base-mnli-negnli/trainer_state.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.0,
|
5 |
+
"global_step": 750,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 3.33,
|
12 |
+
"learning_rate": 6.666666666666667e-06,
|
13 |
+
"loss": 0.5834,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 5.0,
|
18 |
+
"step": 750,
|
19 |
+
"total_flos": 394670126592000.0,
|
20 |
+
"train_loss": 0.444431884765625,
|
21 |
+
"train_runtime": 158.4081,
|
22 |
+
"train_samples_per_second": 37.877,
|
23 |
+
"train_steps_per_second": 4.735
|
24 |
+
}
|
25 |
+
],
|
26 |
+
"max_steps": 750,
|
27 |
+
"num_train_epochs": 5,
|
28 |
+
"total_flos": 394670126592000.0,
|
29 |
+
"trial_name": null,
|
30 |
+
"trial_params": null
|
31 |
+
}
|