train_dataset_path: allenai/llama-3.1-tulu-3-8b-preference-mixture
train_split: train
eval_dataset_path: allenai/llama-3.1-tulu-3-8b-preference-mixture
eval_split: test
chat_template: tokenizer_default
model_name_or_path: phunguyen01/II-Tulu-8B-SFT
torch_dtype: bfloat16
model_revision: main
trust_remote_code: false
attn_implementation: flash_attention_2
output_dir: checkpoints/0b276918-456f-43bc-93cd-e36fec5d8709
beta: 0.1
seed: 42
do_eval: false
learning_rate: 5.0e-07
gradient_accumulation_steps: 8
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
num_train_epochs: 1
hub_model_id: phunguyen01/II-Tulu-8B-DPO-v2
max_length: 2048
max_prompt_length: 1024
evals_per_epoch: 1
bf16: true
logging_steps: 10
push_to_hub: true
optim: rmsprop
lr_scheduler_type: linear
save_strategy: epoch
wandb_project: llm-training-platform
wandb_run_name: II-Tulu-8B-DPO-v2
wandb_run_id: 158a00f5-9ed9-4d55-8ff6-5a626d9d306e
gradient_checkpointing: true # new added