Spaces:
Running
on
Zero
Running
on
Zero
import sys | |
sys.path.append("./Llava1.5/LLaVA") | |
# Detailed model can be viewed at https://github.com/haotian-liu/LLaVA | |
from llava.model.builder import load_pretrained_model | |
from llava.mm_utils import get_model_name_from_path | |
from llava.eval.run_llava import eval_model | |
model_path = "./pretrained_model/llava-v1.5-7b" # "liuhaotian/llava-v1.5-7b" | |
prompt = "Please describe the people in the image, including their gender, age, clothing, facial expressions, and any other distinguishing features." | |
image_file = "./demo.png" | |
tokenizer, model, image_processor, context_len = load_pretrained_model( | |
model_path=model_path, | |
model_base=None, | |
model_name=get_model_name_from_path(model_path), | |
# load_4bit=True | |
) # device="cuda" | |
args = type('Args', (), { | |
"model_path": model_path, | |
"model_base": None, | |
"model_name": get_model_name_from_path(model_path), | |
"query": prompt, | |
"conv_mode": None, | |
"image_file": image_file, | |
"sep": ",", | |
"temperature": 0, | |
"top_p": None, | |
"num_beams": 1, | |
"max_new_tokens": 512 | |
})() | |
outputs = eval_model(args, tokenizer, model, image_processor) | |
print(f"The caption is: {outputs}") | |