File size: 1,171 Bytes
98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba 1a6990e d06efa1 98473ba d06efa1 98473ba c3303b5 98473ba d06efa1 98473ba 94c56ea d06efa1 98473ba d06efa1 98473ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
{
"_name_or_path": "hf-internal-testing/tiny-random-gpt2",
"activation_function": "gelu_new",
"attention_probs_dropout_prob": 0.1,
"attn_pdrop": 0.1,
"bos_token_id": 98,
"embd_pdrop": 0.1,
"eos_token_id": 98,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"initializer_range": 0.02,
"intermediate_size": 37,
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_ctx": 512,
"n_embd": 32,
"n_head": 4,
"n_inner": null,
"n_layer": 5,
"n_positions": 512,
"neuron": {
"auto_cast_type": "fp32",
"batch_size": 1,
"compiler_type": "neuronx-cc",
"compiler_version": "2.11.0.31+b1624c454",
"num_cores": 2,
"sequence_length": 512,
"task": "text-generation"
},
"pad_token_id": 98,
"reorder_and_upcast_attn": false,
"resid_pdrop": 0.1,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"transformers_version": "4.34.0",
"type_vocab_size": 16,
"use_cache": true,
"vocab_size": 1000
}
|