{ "model_type": "gpt2", "architectures": ["GPTForCausalLM"], "vocab_size": 50257, "vector_dim": 768, "context_length": 1024, "n_heads": 24, "n_layers": 20, "dropout_rate": 0.2, "qkv_bias": false, "activation_function": "geglu" }