{ | |
"alpha_pattern": {}, | |
"auto_mapping": null, | |
"base_model_name_or_path": "ISTA-DASLab/Meta-Llama-3-8B-Instruct", | |
"bias": "none", | |
"d": 0.01, | |
"density_pattern": {}, | |
"fan_in_fan_out": false, | |
"grad_4bit_accum": false, | |
"grad_acc_mode": "mean_squared", | |
"impl": "auto", | |
"inference_mode": true, | |
"init_lora_weights": true, | |
"layers_pattern": null, | |
"layers_to_transform": null, | |
"loftq_config": {}, | |
"lora_alpha": 16, | |
"lora_dropout": 0.05, | |
"mask_load_path": "/mnt/data/PanzaMail/scripts/../checkpoints/masks/panza_simon-Meta-Llama-3-8B-Instruct-bf16-bs8-rosa-lr1e-05-5ep-seed41", | |
"mask_save_path": null, | |
"megatron_config": null, | |
"megatron_core": "megatron.core", | |
"modules_to_save": null, | |
"peft_type": "ROSA", | |
"r": 8, | |
"rank_pattern": {}, | |
"revision": null, | |
"rosa_dtype": "bf16", | |
"schedule": "default", | |
"spa_num_grads": 1, | |
"spa_store_transpose": true, | |
"target_modules": [ | |
"k_proj", | |
"down_proj", | |
"v_proj", | |
"gate_proj", | |
"up_proj", | |
"o_proj", | |
"q_proj" | |
], | |
"task_type": "CAUSAL_LM", | |
"terminate_after_mask_generation": null, | |
"use_rslora": false | |
} |