|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.model from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.model |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/added_tokens.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/special_tokens_map.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer_config.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.image_processing_base - loading configuration file preprocessor_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/preprocessor_config.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.image_processing_base - loading configuration file preprocessor_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/preprocessor_config.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.image_processing_base - Image processor LlavaNextImageProcessor { |
|
"aspect_ratio_setting": "anyres", |
|
"crop_size": { |
|
"height": 336, |
|
"width": 336 |
|
}, |
|
"do_center_crop": true, |
|
"do_convert_rgb": true, |
|
"do_normalize": true, |
|
"do_pad": true, |
|
"do_rescale": true, |
|
"do_resize": true, |
|
"image_grid_pinpoints": [ |
|
[ |
|
336, |
|
672 |
|
], |
|
[ |
|
672, |
|
336 |
|
], |
|
[ |
|
672, |
|
672 |
|
], |
|
[ |
|
1008, |
|
336 |
|
], |
|
[ |
|
336, |
|
1008 |
|
] |
|
], |
|
"image_mean": [ |
|
0.48145466, |
|
0.4578275, |
|
0.40821073 |
|
], |
|
"image_processor_type": "LlavaNextImageProcessor", |
|
"image_std": [ |
|
0.26862954, |
|
0.26130258, |
|
0.27577711 |
|
], |
|
"processor_class": "LlavaNextProcessor", |
|
"resample": 3, |
|
"rescale_factor": 0.00392156862745098, |
|
"size": { |
|
"shortest_edge": 336 |
|
} |
|
} |
|
|
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.model from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.model |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/added_tokens.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/special_tokens_map.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/tokenizer_config.json |
|
|
|
07/03/2024 15:04:16 - INFO - transformers.processing_utils - Processor LlavaNextProcessor: |
|
- image_processor: LlavaNextImageProcessor { |
|
"aspect_ratio_setting": "anyres", |
|
"crop_size": { |
|
"height": 336, |
|
"width": 336 |
|
}, |
|
"do_center_crop": true, |
|
"do_convert_rgb": true, |
|
"do_normalize": true, |
|
"do_pad": true, |
|
"do_rescale": true, |
|
"do_resize": true, |
|
"image_grid_pinpoints": [ |
|
[ |
|
336, |
|
672 |
|
], |
|
[ |
|
672, |
|
336 |
|
], |
|
[ |
|
672, |
|
672 |
|
], |
|
[ |
|
1008, |
|
336 |
|
], |
|
[ |
|
336, |
|
1008 |
|
] |
|
], |
|
"image_mean": [ |
|
0.48145466, |
|
0.4578275, |
|
0.40821073 |
|
], |
|
"image_processor_type": "LlavaNextImageProcessor", |
|
"image_std": [ |
|
0.26862954, |
|
0.26130258, |
|
0.27577711 |
|
], |
|
"processor_class": "LlavaNextProcessor", |
|
"resample": 3, |
|
"rescale_factor": 0.00392156862745098, |
|
"size": { |
|
"shortest_edge": 336 |
|
} |
|
} |
|
|
|
- tokenizer: LlamaTokenizerFast(name_or_path='llava-hf/llava-v1.6-vicuna-7b-hf', vocab_size=32000, model_max_length=4096, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<unk>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={ |
|
0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), |
|
1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), |
|
2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), |
|
32000: AddedToken("<image>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), |
|
} |
|
|
|
{ |
|
"chat_template": null, |
|
"processor_class": "LlavaNextProcessor" |
|
} |
|
|
|
|
|
07/03/2024 15:04:16 - INFO - llamafactory.data.loader - Loading dataset tminh/test-llava4... |
|
|
|
07/03/2024 15:04:24 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json |
|
|
|
07/03/2024 15:04:24 - INFO - transformers.configuration_utils - Model config LlavaNextConfig { |
|
"_name_or_path": "llava-hf/llava-v1.6-vicuna-7b-hf", |
|
"architectures": [ |
|
"LlavaNextForConditionalGeneration" |
|
], |
|
"ignore_index": -100, |
|
"image_grid_pinpoints": [ |
|
[ |
|
336, |
|
672 |
|
], |
|
[ |
|
672, |
|
336 |
|
], |
|
[ |
|
672, |
|
672 |
|
], |
|
[ |
|
1008, |
|
336 |
|
], |
|
[ |
|
336, |
|
1008 |
|
] |
|
], |
|
"image_token_index": 32000, |
|
"model_type": "llava_next", |
|
"projector_hidden_act": "gelu", |
|
"text_config": { |
|
"_name_or_path": "lmsys/vicuna-7b-v1.5", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"max_position_embeddings": 4096, |
|
"model_type": "llama", |
|
"pad_token_id": 0, |
|
"rms_norm_eps": 1e-05, |
|
"torch_dtype": "float16", |
|
"vocab_size": 32064 |
|
}, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.42.3", |
|
"use_image_newline_parameter": true, |
|
"vision_config": { |
|
"hidden_size": 1024, |
|
"image_size": 336, |
|
"intermediate_size": 4096, |
|
"model_type": "clip_vision_model", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"patch_size": 14, |
|
"projection_dim": 768, |
|
"vocab_size": 32000 |
|
}, |
|
"vision_feature_layer": -2, |
|
"vision_feature_select_strategy": "default", |
|
"vocab_size": 32064 |
|
} |
|
|
|
|
|
07/03/2024 15:04:25 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json |
|
|
|
07/03/2024 15:04:25 - INFO - transformers.configuration_utils - Model config LlavaNextConfig { |
|
"_name_or_path": "llava-hf/llava-v1.6-vicuna-7b-hf", |
|
"architectures": [ |
|
"LlavaNextForConditionalGeneration" |
|
], |
|
"ignore_index": -100, |
|
"image_grid_pinpoints": [ |
|
[ |
|
336, |
|
672 |
|
], |
|
[ |
|
672, |
|
336 |
|
], |
|
[ |
|
672, |
|
672 |
|
], |
|
[ |
|
1008, |
|
336 |
|
], |
|
[ |
|
336, |
|
1008 |
|
] |
|
], |
|
"image_token_index": 32000, |
|
"model_type": "llava_next", |
|
"projector_hidden_act": "gelu", |
|
"text_config": { |
|
"_name_or_path": "lmsys/vicuna-7b-v1.5", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"max_position_embeddings": 4096, |
|
"model_type": "llama", |
|
"pad_token_id": 0, |
|
"rms_norm_eps": 1e-05, |
|
"torch_dtype": "float16", |
|
"vocab_size": 32064 |
|
}, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.42.3", |
|
"use_image_newline_parameter": true, |
|
"vision_config": { |
|
"hidden_size": 1024, |
|
"image_size": 336, |
|
"intermediate_size": 4096, |
|
"model_type": "clip_vision_model", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"patch_size": 14, |
|
"projection_dim": 768, |
|
"vocab_size": 32000 |
|
}, |
|
"vision_feature_layer": -2, |
|
"vision_feature_select_strategy": "default", |
|
"vocab_size": 32064 |
|
} |
|
|
|
|
|
07/03/2024 15:04:25 - WARNING - llamafactory.model.model_utils.unsloth - Unsloth does not support model type llava_next. |
|
|
|
07/03/2024 15:04:25 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/model.safetensors.index.json |
|
|
|
07/03/2024 15:05:57 - INFO - transformers.modeling_utils - Instantiating LlavaNextForConditionalGeneration model under default dtype torch.bfloat16. |
|
|
|
07/03/2024 15:05:57 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {} |
|
|
|
|
|
07/03/2024 15:05:58 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 0 |
|
} |
|
|
|
|
|
07/03/2024 15:08:02 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing LlavaNextForConditionalGeneration. |
|
|
|
|
|
07/03/2024 15:08:02 - INFO - transformers.modeling_utils - All the weights of LlavaNextForConditionalGeneration were initialized from the model checkpoint at llava-hf/llava-v1.6-vicuna-7b-hf. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlavaNextForConditionalGeneration for predictions without further training. |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/generation_config.json |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 0 |
|
} |
|
|
|
|
|
07/03/2024 15:08:02 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. |
|
|
|
07/03/2024 15:08:02 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. |
|
|
|
07/03/2024 15:08:02 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. |
|
|
|
07/03/2024 15:08:02 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA |
|
|
|
07/03/2024 15:08:02 - INFO - llamafactory.model.model_utils.misc - Found linear modules: v_proj,q_proj,up_proj,linear_2,o_proj,down_proj,k_proj,gate_proj,linear_1 |
|
|
|
07/03/2024 15:08:02 - INFO - llamafactory.model.loader - trainable params: 5023744 || all params: 7068454912 || trainable%: 0.0711 |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Using auto half precision backend |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - ***** Running training ***** |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Num examples = 100 |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Num Epochs = 1 |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Instantaneous batch size per device = 1 |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 8 |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Gradient Accumulation steps = 8 |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Total optimization steps = 12 |
|
|
|
07/03/2024 15:08:02 - INFO - transformers.trainer - Number of trainable parameters = 5,023,744 |
|
|
|
07/03/2024 15:08:04 - WARNING - transformers.models.llama.modeling_llama - `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
|
|
07/03/2024 15:10:44 - INFO - llamafactory.extras.callbacks - {'loss': 1.1881, 'learning_rate': 3.1470e-05, 'epoch': 0.40, 'throughput': 113.65} |
|
|
|
07/03/2024 15:13:42 - INFO - llamafactory.extras.callbacks - {'loss': 1.1695, 'learning_rate': 3.3494e-06, 'epoch': 0.80, 'throughput': 111.00} |
|
|
|
07/03/2024 15:14:56 - INFO - transformers.trainer - Saving model checkpoint to saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50/checkpoint-12 |
|
|
|
07/03/2024 15:14:56 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json |
|
|
|
07/03/2024 15:14:56 - INFO - transformers.configuration_utils - Model config LlavaNextConfig { |
|
"architectures": [ |
|
"LlavaNextForConditionalGeneration" |
|
], |
|
"ignore_index": -100, |
|
"image_grid_pinpoints": [ |
|
[ |
|
336, |
|
672 |
|
], |
|
[ |
|
672, |
|
336 |
|
], |
|
[ |
|
672, |
|
672 |
|
], |
|
[ |
|
1008, |
|
336 |
|
], |
|
[ |
|
336, |
|
1008 |
|
] |
|
], |
|
"image_token_index": 32000, |
|
"model_type": "llava_next", |
|
"projector_hidden_act": "gelu", |
|
"text_config": { |
|
"_name_or_path": "lmsys/vicuna-7b-v1.5", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"max_position_embeddings": 4096, |
|
"model_type": "llama", |
|
"pad_token_id": 0, |
|
"rms_norm_eps": 1e-05, |
|
"torch_dtype": "float16", |
|
"vocab_size": 32064 |
|
}, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.42.3", |
|
"use_image_newline_parameter": true, |
|
"vision_config": { |
|
"hidden_size": 1024, |
|
"image_size": 336, |
|
"intermediate_size": 4096, |
|
"model_type": "clip_vision_model", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"patch_size": 14, |
|
"projection_dim": 768, |
|
"vocab_size": 32000 |
|
}, |
|
"vision_feature_layer": -2, |
|
"vision_feature_select_strategy": "default", |
|
"vocab_size": 32064 |
|
} |
|
|
|
|
|
07/03/2024 15:14:56 - INFO - transformers.image_processing_base - Image processor saved in saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50/checkpoint-12/preprocessor_config.json |
|
|
|
07/03/2024 15:14:56 - INFO - transformers.trainer - |
|
|
|
Training completed. Do not forget to share your model on huggingface.co/models =) |
|
|
|
|
|
|
|
07/03/2024 15:14:56 - INFO - transformers.trainer - Saving model checkpoint to saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50 |
|
|
|
07/03/2024 15:14:56 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--llava-hf--llava-v1.6-vicuna-7b-hf/snapshots/382e15404d46a627a15b299a41618e2efb04f2a7/config.json |
|
|
|
07/03/2024 15:14:56 - INFO - transformers.configuration_utils - Model config LlavaNextConfig { |
|
"architectures": [ |
|
"LlavaNextForConditionalGeneration" |
|
], |
|
"ignore_index": -100, |
|
"image_grid_pinpoints": [ |
|
[ |
|
336, |
|
672 |
|
], |
|
[ |
|
672, |
|
336 |
|
], |
|
[ |
|
672, |
|
672 |
|
], |
|
[ |
|
1008, |
|
336 |
|
], |
|
[ |
|
336, |
|
1008 |
|
] |
|
], |
|
"image_token_index": 32000, |
|
"model_type": "llava_next", |
|
"projector_hidden_act": "gelu", |
|
"text_config": { |
|
"_name_or_path": "lmsys/vicuna-7b-v1.5", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"max_position_embeddings": 4096, |
|
"model_type": "llama", |
|
"pad_token_id": 0, |
|
"rms_norm_eps": 1e-05, |
|
"torch_dtype": "float16", |
|
"vocab_size": 32064 |
|
}, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.42.3", |
|
"use_image_newline_parameter": true, |
|
"vision_config": { |
|
"hidden_size": 1024, |
|
"image_size": 336, |
|
"intermediate_size": 4096, |
|
"model_type": "clip_vision_model", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"patch_size": 14, |
|
"projection_dim": 768, |
|
"vocab_size": 32000 |
|
}, |
|
"vision_feature_layer": -2, |
|
"vision_feature_select_strategy": "default", |
|
"vocab_size": 32064 |
|
} |
|
|
|
|
|
07/03/2024 15:14:56 - INFO - transformers.image_processing_base - Image processor saved in saves/LLaVA1.5-7B-Chat/lora/train_2024-07-03-14-56-50/preprocessor_config.json |
|
|
|
07/03/2024 15:14:56 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot. |
|
|
|
07/03/2024 15:14:56 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields: |
|
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} |
|
|
|
|