File size: 1,171 Bytes
98473ba
d06efa1
98473ba
d06efa1
98473ba
d06efa1
98473ba
d06efa1
 
 
 
98473ba
d06efa1
98473ba
 
d06efa1
 
 
98473ba
d06efa1
 
98473ba
1a6990e
d06efa1
98473ba
d06efa1
98473ba
c3303b5
98473ba
 
d06efa1
98473ba
 
 
 
 
 
 
 
 
94c56ea
d06efa1
98473ba
d06efa1
98473ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
{
  "_name_or_path": "hf-internal-testing/tiny-random-gpt2",
  "activation_function": "gelu_new",
  "attention_probs_dropout_prob": 0.1,
  "attn_pdrop": 0.1,
  "bos_token_id": 98,
  "embd_pdrop": 0.1,
  "eos_token_id": 98,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "initializer_range": 0.02,
  "intermediate_size": 37,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 32,
  "n_head": 4,
  "n_inner": null,
  "n_layer": 5,
  "n_positions": 512,
  "neuron": {
    "auto_cast_type": "fp32",
    "batch_size": 1,
    "compiler_type": "neuronx-cc",
    "compiler_version": "2.11.0.31+b1624c454",
    "num_cores": 2,
    "sequence_length": 512,
    "task": "text-generation"
  },
  "pad_token_id": 98,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.34.0",
  "type_vocab_size": 16,
  "use_cache": true,
  "vocab_size": 1000
}