Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,163 Bytes
38e8cc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import sys
sys.path.append("./Llava1.5/LLaVA")
# Detailed model can be viewed at https://github.com/haotian-liu/LLaVA
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path
from llava.eval.run_llava import eval_model
model_path = "./pretrained_model/llava-v1.5-7b" # "liuhaotian/llava-v1.5-7b"
prompt = "Please describe the people in the image, including their gender, age, clothing, facial expressions, and any other distinguishing features."
image_file = "./demo.png"
tokenizer, model, image_processor, context_len = load_pretrained_model(
model_path=model_path,
model_base=None,
model_name=get_model_name_from_path(model_path),
# load_4bit=True
) # device="cuda"
args = type('Args', (), {
"model_path": model_path,
"model_base": None,
"model_name": get_model_name_from_path(model_path),
"query": prompt,
"conv_mode": None,
"image_file": image_file,
"sep": ",",
"temperature": 0,
"top_p": None,
"num_beams": 1,
"max_new_tokens": 512
})()
outputs = eval_model(args, tokenizer, model, image_processor)
print(f"The caption is: {outputs}")
|