File size: 1,096 Bytes
9551965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
    "config": {
      "adapter_residual_before_ln": false,
      "cross_adapter": false,
      "factorized_phm_W": true,
      "factorized_phm_rule": false,
      "hypercomplex_nonlinearity": "glorot-uniform",
      "init_weights": "bert",
      "inv_adapter": null,
      "inv_adapter_reduction_factor": null,
      "is_parallel": false,
      "learn_phm": true,
      "leave_out": [],
      "ln_after": false,
      "ln_before": false,
      "mh_adapter": true,
      "non_linearity": "swish",
      "original_ln_after": true,
      "original_ln_before": false,
      "output_adapter": true,
      "phm_bias": true,
      "phm_c_init": "normal",
      "phm_dim": 100,
      "phm_init_range": 0.0001,
      "phm_layer": false,
      "phm_rank": 1,
      "reduction_factor": 3,
      "residual_before_ln": true,
      "scaling": 1.0,
      "shared_W_phm": false,
      "shared_phm_rule": true,
      "use_gating": false
    },
    "hidden_size": 1600,
    "model_class": "GPT2LMHeadModel",
    "model_name": "gpt2-xl",
    "model_type": "gpt2",
    "name": "astraia",
    "version": "3.2.0"
  }