from transformers import AutoProcessor, AutoModelForCausalLM
import torch
from ultralytics import YOLO

def load_models(device='cpu'):
    """Initialize and load all required models."""
    # Set default dtype for torch
    torch.set_default_dtype(torch.float32)
    
    yolo_model = YOLO('best.pt').to(device)
    
    processor = AutoProcessor.from_pretrained(
        "microsoft/Florence-2-base", 
        trust_remote_code=True
    )
    
    caption_model = AutoModelForCausalLM.from_pretrained(
        "microsoft/OmniParser/icon_caption_florence",
        torch_dtype=torch.float32,  # Changed from float16 to float32
        trust_remote_code=True
    ).to(device)
    
    return {
        'yolo_model': yolo_model,
        'processor': processor,
        'caption_model': caption_model
    }