File size: 1,496 Bytes
f5de5d5 5adefcd d75459b 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 5adefcd f5de5d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
{
"amp": 1,
"architectures": [
"FlaubertWithLMHeadModel"
],
"asm": false,
"attention_dropout": 0.1,
"bos_index": 0,
"bos_token_id": 0,
"bptt": 512,
"causal": false,
"clip_grad_norm": 5,
"dropout": 0.1,
"emb_dim": 768,
"embed_init_std": 0.02209708691207961,
"encoder_only": true,
"end_n_top": 5,
"eos_index": 1,
"fp16": true,
"gelu_activation": true,
"group_by_size": true,
"id2lang": {
"0": "fr"
},
"init_std": 0.02,
"is_encoder": true,
"lang2id": {
"fr": 0
},
"lang_id": 0,
"langs": [
"fr"
],
"layer_norm_eps": 1e-12,
"layerdrop": 0.0,
"lg_sampling_factor": -1,
"lgs": "fr",
"mask_index": 5,
"mask_token_id": 0,
"max_batch_size": 0,
"max_position_embeddings": 512,
"max_vocab": -1,
"mlm_steps": [
[
"fr",
null
]
],
"model_type": "flaubert",
"n_heads": 12,
"n_langs": 1,
"n_layers": 12,
"pad_index": 2,
"pad_token_id": 2,
"pre_norm": false,
"sample_alpha": 0,
"share_inout_emb": true,
"sinusoidal_embeddings": false,
"start_n_top": 5,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "first",
"summary_use_proj": true,
"tokens_per_batch": -1,
"unk_index": 3,
"use_lang_emb": true,
"vocab_size": 68729,
"word_blank": 0,
"word_dropout": 0,
"word_keep": 0.1,
"word_mask": 0.8,
"word_mask_keep_rand": "0.8,0.1,0.1",
"word_pred": 0.15,
"word_rand": 0.1,
"word_shuffle": 0
}
|