mgladden commited on
Commit
5b8a9fe
·
1 Parent(s): 0406d97

Upload model

Browse files
Files changed (3) hide show
  1. README.md +10 -8
  2. config.json +2 -9
  3. tf_model.h5 +2 -2
README.md CHANGED
@@ -1,18 +1,20 @@
1
  ---
2
- license: apache-2.0
3
  tags:
4
- - management
5
- - text generation
6
  model-index:
7
  - name: ManaGPT-1010
8
  results: []
9
- language:
10
- - en
11
  ---
12
 
 
 
 
13
  # ManaGPT-1010
14
- <img style="float:right; margin-right:30px" src="https://huggingface.co/NeuraXenetica/ManaGPT-1010/resolve/main/ManaGPT_logo_01.png" width="150" height="150">
15
- This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on a custom dataset.
 
 
16
 
17
  ## Model description
18
 
@@ -43,4 +45,4 @@ The following hyperparameters were used during training:
43
  - Transformers 4.27.1
44
  - TensorFlow 2.11.0
45
  - Datasets 2.10.1
46
- - Tokenizers 0.13.2
 
1
  ---
2
+ license: mit
3
  tags:
4
+ - generated_from_keras_callback
 
5
  model-index:
6
  - name: ManaGPT-1010
7
  results: []
 
 
8
  ---
9
 
10
+ <!-- This model card has been generated automatically according to the information Keras had access to. You should
11
+ probably proofread and complete it, then remove this comment. -->
12
+
13
  # ManaGPT-1010
14
+
15
+ This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
16
+ It achieves the following results on the evaluation set:
17
+
18
 
19
  ## Model description
20
 
 
45
  - Transformers 4.27.1
46
  - TensorFlow 2.11.0
47
  - Datasets 2.10.1
48
+ - Tokenizers 0.13.2
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "_name_or_path": "distilgpt2",
3
- "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
6
  "GPT2LMHeadModel"
@@ -10,13 +9,7 @@
10
  "do_sample": true,
11
  "embd_pdrop": 0.1,
12
  "eos_token_id": 50256,
13
- "id2label": {
14
- "0": "LABEL_0"
15
- },
16
  "initializer_range": 0.02,
17
- "label2id": {
18
- "LABEL_0": 0
19
- },
20
  "layer_norm_epsilon": 1e-05,
21
  "max_length": 50,
22
  "model_type": "gpt2",
@@ -24,7 +17,7 @@
24
  "n_embd": 768,
25
  "n_head": 12,
26
  "n_inner": null,
27
- "n_layer": 6,
28
  "n_positions": 1024,
29
  "pad_token_id": 50256,
30
  "reorder_and_upcast_attn": false,
 
1
  {
2
+ "_name_or_path": "gpt2",
 
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
9
  "do_sample": true,
10
  "embd_pdrop": 0.1,
11
  "eos_token_id": 50256,
 
 
 
12
  "initializer_range": 0.02,
 
 
 
13
  "layer_norm_epsilon": 1e-05,
14
  "max_length": 50,
15
  "model_type": "gpt2",
 
17
  "n_embd": 768,
18
  "n_head": 12,
19
  "n_inner": null,
20
+ "n_layer": 12,
21
  "n_positions": 1024,
22
  "pad_token_id": 50256,
23
  "reorder_and_upcast_attn": false,
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:928fcd99565456294c904d0ced5e0c9e81871df1a9511bbfa9a0e2da47fd62a5
3
- size 327745472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb46ed853ac5704b211c087abcd299d904b9f69b2b959d84e6a00e17a546b931
3
+ size 497935440