mathy-vicuna-13B-FFT / Aug11_01-11-56_watgpu-100.train.sh

Upload training commands.

fe739fd over 1 year ago

1.27 kB

	export NCCL_BLOCKING_WAIT=1 # Set this variable to use the NCCL backend
	export NCCL_IB_DISABLE=1
	export NCCL_DEBUG=INFO
	export NCCL_P2P_DISABLE=1 # direct access between GPUs? using NVLink or PCI.
	# See https://github.com/NVIDIA/nccl/issues/631

	#export TORCH_DISTRIBUTED_DEBUG=DETAIL
	export TORCH_DISTRIBUTED_DEBUG=OFF

	#--data_file ./data/finetune-pairs.json \

	deepspeed \
	--include=localhost:4,5,6,7 \
	--master_port 8921 \
	train.py \
	\
	--model_name_or_path lmsys/vicuna-13b-v1.5 \
	--data_file ./data/finetune-pairs.json \
	--debug_single_layer False \
	--dryrun False \
	--use_lora False \
	--ctx_length 2048 \
	--datamap_nprocs 10 \
	--use_flash_att2 True \
	--load_8bit False \
	--num_train_epochs 3 \
	\
	--output_dir ./output \
	--save_strategy "steps" \
	--save_steps 100 \
	--save_total_limit 2 \
	--logging_steps 1 \
	--report_to "tensorboard" \
	\
	--per_device_train_batch_size 1 \
	--gradient_accumulation_steps 12 \
	--max_grad_norm 1.0 \
	--learning_rate 2e-5 \
	--warmup_ratio 0.03 \
	--fp16 False \
	--bf16 True \
	--deepspeed $(python ds_config.py \
	--en_param_offload False \
	--en_act_ackpt False \
	--en_sparse_attn False \
	)