|
--- |
|
license: llama3 |
|
language: |
|
- en |
|
library_name: transformers |
|
pipeline_tag: text-generation |
|
datasets: |
|
- cognitivecomputations/WizardLM_alpaca_evol_instruct_70k_unfiltered |
|
tags: |
|
- finetune |
|
- llama |
|
--- |
|
# WizardHermes-2-Pro-Llama-3-8b |
|
Finetuned from Meta-Llama-3-8b, 0.55 epoches |
|
|
|
Merged with Hermes-2-Pro-Llama-3-8b |
|
|
|
Eval (0.0 temp) ([https://inflection.ai/assets/MMLU-Examples.pdf](https://inflection.ai/assets/MMLU-Examples.pdf), 0-shot): |
|
``` |
|
Hermes 2 Pro LoRA (merged) (checkpoint 1): 7/12 |
|
Hermes 2 Pro LoRA (merged) (checkpoint 2): 9/12 |
|
Hermes 2 Pro Llama 3 8b: 8/12 |
|
``` |
|
TruthfulQA (0-shot): |
|
``` |
|
Hermes 2 Pro LoRA (merged) (checkpoint 2): 9/15 |
|
Hermes 2 Pro Llama 3 8b: 7/15 |
|
``` |
|
|
|
**Trained using MonsterAPI** |
|
|
|
LoRA training parameters: |
|
``` |
|
{ |
|
"deployment_name": "wizardlm-llama-3-8b", |
|
"pretrainedmodel_config": { |
|
"model_path": "meta-llama/Meta-Llama-3-8B", |
|
"other_model_info": null, |
|
"resume_checkpoint_path": "", |
|
"use_lora": true, |
|
"lora_r": 8, |
|
"lora_alpha": 16, |
|
"lora_dropout": 0, |
|
"lora_bias": "none", |
|
"use_quantization": false, |
|
"use_gradient_checkpointing": false, |
|
"parallelization": "nmp" |
|
}, |
|
"data_config": { |
|
"data_path": "cognitivecomputations/WizardLM_alpaca_evol_instruct_70k_unfiltered", |
|
"data_subset": "default", |
|
"data_source_type": "hub_link", |
|
"cutoff_len": 4096, |
|
"data_split_config": { |
|
"train": 0.9, |
|
"validation": 0.1 |
|
}, |
|
"prevalidated": true, |
|
"concat_config": { |
|
"0": { |
|
"text": "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n\n###Instruction:", |
|
"column": false |
|
}, |
|
"1": { |
|
"text": "instruction", |
|
"column": true |
|
}, |
|
"2": { |
|
"text": "\n\n\n###Response:", |
|
"column": false |
|
}, |
|
"3": { |
|
"text": "output", |
|
"column": true |
|
} |
|
} |
|
}, |
|
"training_config": { |
|
"early_stopping_patience": 5, |
|
"num_train_epochs": 5, |
|
"gradient_accumulation_steps": 1, |
|
"warmup_steps": 300, |
|
"learning_rate": 0.0005, |
|
"lr_scheduler_type": "reduce_lr_on_plateau", |
|
"group_by_length": false, |
|
"use_hugging_face": false |
|
}, |
|
"logging_config": { |
|
"use_wandb": false, |
|
"wandb_username": "", |
|
"wandb_login_key": "", |
|
"wandb_project": "", |
|
"wandb_run_name": "" |
|
}, |
|
"accessorytasks_config": { |
|
"run_eval_report": false, |
|
"run_quantize_merge": false |
|
} |
|
} |
|
``` |