metadata
license: llama3
language:
- en
library_name: transformers
pipeline_tag: text-generation
datasets:
- cognitivecomputations/WizardLM_alpaca_evol_instruct_70k_unfiltered
tags:
- finetune
- llama
WizardHermes-2-Pro-Llama-3-8b
Finetuned from Meta-Llama-3-8b, 0.55 epoches
Merged with Hermes-2-Pro-Llama-3-8b
Eval (0.0 temp) (https://inflection.ai/assets/MMLU-Examples.pdf, 0-shot):
Hermes 2 Pro LoRA (merged) (checkpoint 1): 7/12
Hermes 2 Pro LoRA (merged) (checkpoint 2): 9/12
Hermes 2 Pro Llama 3 8b: 8/12
TruthfulQA (0-shot):
Hermes 2 Pro LoRA (merged) (checkpoint 2): 9/15
Hermes 2 Pro Llama 3 8b: 7/15
Trained using MonsterAPI
LoRA training parameters:
{
"deployment_name": "wizardlm-llama-3-8b",
"pretrainedmodel_config": {
"model_path": "meta-llama/Meta-Llama-3-8B",
"other_model_info": null,
"resume_checkpoint_path": "",
"use_lora": true,
"lora_r": 8,
"lora_alpha": 16,
"lora_dropout": 0,
"lora_bias": "none",
"use_quantization": false,
"use_gradient_checkpointing": false,
"parallelization": "nmp"
},
"data_config": {
"data_path": "cognitivecomputations/WizardLM_alpaca_evol_instruct_70k_unfiltered",
"data_subset": "default",
"data_source_type": "hub_link",
"cutoff_len": 4096,
"data_split_config": {
"train": 0.9,
"validation": 0.1
},
"prevalidated": true,
"concat_config": {
"0": {
"text": "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n\n###Instruction:",
"column": false
},
"1": {
"text": "instruction",
"column": true
},
"2": {
"text": "\n\n\n###Response:",
"column": false
},
"3": {
"text": "output",
"column": true
}
}
},
"training_config": {
"early_stopping_patience": 5,
"num_train_epochs": 5,
"gradient_accumulation_steps": 1,
"warmup_steps": 300,
"learning_rate": 0.0005,
"lr_scheduler_type": "reduce_lr_on_plateau",
"group_by_length": false,
"use_hugging_face": false
},
"logging_config": {
"use_wandb": false,
"wandb_username": "",
"wandb_login_key": "",
"wandb_project": "",
"wandb_run_name": ""
},
"accessorytasks_config": {
"run_eval_report": false,
"run_quantize_merge": false
}
}