Adokato commited on
Commit
97b9624
·
verified ·
1 Parent(s): 5c98ed2

End of training

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [eugenesiow/bart-paraphrase](https://huggingface.co/eugenesiow/bart-paraphrase) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.7935
19
 
20
  ## Model description
21
 
@@ -35,11 +35,11 @@ More information needed
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 2e-05
38
- - train_batch_size: 16
39
- - eval_batch_size: 16
40
  - seed: 42
41
  - gradient_accumulation_steps: 2
42
- - total_train_batch_size: 32
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
  - lr_scheduler_warmup_steps: 500
@@ -47,11 +47,11 @@ The following hyperparameters were used during training:
47
 
48
  ### Training results
49
 
50
- | Training Loss | Epoch | Step | Validation Loss |
51
- |:-------------:|:-----:|:----:|:---------------:|
52
- | 4.1212 | 1.0 | 187 | 3.4982 |
53
- | 2.8867 | 2.0 | 374 | 2.3374 |
54
- | 2.2427 | 3.0 | 561 | 1.7548 |
55
 
56
 
57
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [eugenesiow/bart-paraphrase](https://huggingface.co/eugenesiow/bart-paraphrase) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 3.3337
19
 
20
  ## Model description
21
 
 
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 2e-05
38
+ - train_batch_size: 32
39
+ - eval_batch_size: 32
40
  - seed: 42
41
  - gradient_accumulation_steps: 2
42
+ - total_train_batch_size: 64
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
  - lr_scheduler_warmup_steps: 500
 
47
 
48
  ### Training results
49
 
50
+ | Training Loss | Epoch | Step | Validation Loss |
51
+ |:-------------:|:------:|:----:|:---------------:|
52
+ | 5.2622 | 0.9968 | 154 | 4.8920 |
53
+ | 4.4409 | 2.0 | 309 | 3.8232 |
54
+ | 3.8001 | 2.9903 | 462 | 3.2888 |
55
 
56
 
57
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33c9486e8d903ee43bc2221289cfff29d979bd726dd42c0a4cc2ac767446a109
3
  size 1625426996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5752e548c1e4f7ef8deba4be3cd81e7069c939ee0c2bf600a386c98da0120973
3
  size 1625426996
special_tokens_map.json CHANGED
@@ -1,7 +1,25 @@
1
  {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
@@ -9,7 +27,25 @@
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
 
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
  }
tokenizer_config.json CHANGED
@@ -48,10 +48,9 @@
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
- "model_max_length": 1000000000000000019884624838656,
52
  "pad_token": "<pad>",
53
  "sep_token": "</s>",
54
  "tokenizer_class": "BartTokenizer",
55
- "trim_offsets": true,
56
  "unk_token": "<unk>"
57
  }
 
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
+ "model_max_length": 1024,
52
  "pad_token": "<pad>",
53
  "sep_token": "</s>",
54
  "tokenizer_class": "BartTokenizer",
 
55
  "unk_token": "<unk>"
56
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ac0b7cf3cf74e5bdfb57541f92de3816a8163161438428d48f5437117cdf4fd
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4a9d90b024e60cf38f5d53c1d4213acadc99707cb5b7712469951525ad4fb0
3
  size 4920
vocab.json CHANGED
The diff for this file is too large to render. See raw diff