from transformers import AutoProcessor, AutoModelForCausalLM import torch from ultralytics import YOLO def load_models(device='cpu'): """Initialize and load all required models.""" # Set default dtype for torch torch.set_default_dtype(torch.float32) yolo_model = YOLO('best.pt').to(device) processor = AutoProcessor.from_pretrained( "microsoft/Florence-2-base", trust_remote_code=True ) caption_model = AutoModelForCausalLM.from_pretrained( "microsoft/OmniParser/icon_caption_florence", torch_dtype=torch.float32, # Changed from float16 to float32 trust_remote_code=True ).to(device) return { 'yolo_model': yolo_model, 'processor': processor, 'caption_model': caption_model }