{ | |
"attn_dropout_p": 0.0, | |
"aux_loss_weight": 0.0, | |
"clusters_path": null, | |
"d_ff": 512, | |
"d_model": 384, | |
"distr_output": { | |
"_target_": "uni2ts.distribution.mixture.MixtureOutput", | |
"components": [ | |
{ | |
"_target_": "uni2ts.distribution.student_t.StudentTOutput" | |
}, | |
{ | |
"_target_": "uni2ts.distribution.normal.NormalFixedScaleOutput", | |
"scale": 0.001 | |
}, | |
{ | |
"_target_": "uni2ts.distribution.negative_binomial.NegativeBinomialOutput" | |
}, | |
{ | |
"_target_": "uni2ts.distribution.log_normal.LogNormalOutput" | |
} | |
] | |
}, | |
"dropout_p": 0.0, | |
"max_seq_len": 512, | |
"num_experts": 32, | |
"num_experts_per_token": 2, | |
"num_layers": 6, | |
"patch_sizes": [ | |
16 | |
], | |
"scaling": true, | |
"use_clusters": true | |
} |