Spaces:
Sleeping
Sleeping
MODEL_NAME="allenai/MolmoE-1B-0924" | |
from transformers import AutoModelForCausalLM | |
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig | |
from PIL import Image | |
import requests | |
# load the processor | |
processor = AutoProcessor.from_pretrained( | |
'allenai/MolmoE-1B-0924', | |
trust_remote_code=True, | |
torch_dtype='auto', | |
device_map='auto' | |
) | |
# load the model | |
model = AutoModelForCausalLM.from_pretrained( | |
'allenai/MolmoE-1B-0924', | |
trust_remote_code=True, | |
torch_dtype='auto', | |
device_map='auto' | |
) | |
# process the image and text | |
inputs = processor.process( | |
images=[Image.open(requests.get("https://picsum.photos/id/237/536/354", stream=True).raw)], | |
text="Describe this image." | |
) | |
# move inputs to the correct device and make a batch of size 1 | |
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()} | |
# generate output; maximum 200 new tokens; stop generation when <|endoftext|> is generated | |
output = model.generate_from_batch( | |
inputs, | |
GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"), | |
tokenizer=processor.tokenizer | |
) | |
# only get generated tokens; decode them to text | |
generated_tokens = output[0,inputs['input_ids'].size(1):] | |
generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True) | |
# print the generated text | |
print(generated_text) | |
# >>> This photograph captures a small black puppy, likely a Labrador or a similar breed, | |
# sitting attentively on a weathered wooden deck. The deck, composed of three... | |
# import cv2 | |
# class Solution(): | |
# def __init__(self,prompt): | |
# self.prompt= prompt | |
# self.output_dir=None | |
# # read a mp4 file and getting its frame at a particular interval. | |
# def read_frame(self,file,interval=1): | |
# video=cv2.VideoCapture(file) | |
# fps= video.get(cv2.CAP_PROP_FPS) | |
# frame_interval= fps*interval# fps= 24 frame/sec and interval = 1 sec so frame interval = 24 frame | |
# while True: | |
# success, frame=video.read() | |
# if not success: | |
# break | |
# if frame % frame_interval==0: | |
# # process this frame | |
# """ | |
# .. to do | |
# """ | |
# def find(self,input_message): | |
# read a .mp4 file | |
# get a interval N spaced | |