Spaces:

ArissBandoss
/

DeepFake-Videos-Detection

Running

File size: 4,099 Bytes

caa56d6

import os
import cv2
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from training.detectors import DETECTOR
import yaml

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load the model
def load_model(model_name, config_path, weights_path):
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    config['model_name'] = model_name
    
    model_class = DETECTOR[model_name]
    model = model_class(config).to(device)
    
    checkpoint = torch.load(weights_path, map_location=device)
    model.load_state_dict(checkpoint, strict=True)
    model.eval()
    return model

# preprocess a single video
def preprocess_video(video_path, output_dir, frame_num=32):
    os.makedirs(output_dir, exist_ok=True)
    frames_dir = os.path.join(output_dir, "frames")
    os.makedirs(frames_dir, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, frame_num, dtype=int)
    
    # extract frames
    frames = []
    for idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame_path = os.path.join(frames_dir, f"frame_{idx:04d}.png")
            cv2.imwrite(frame_path, frame)
            frames.append(frame_path)
    
    cap.release()
    return frames

# inference on a single video
def infer_video(video_path, model, device):
    output_dir = "temp_video_frames"
    frames = preprocess_video(video_path, output_dir)
    
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    probs = []
    for frame_path in frames:
        frame = Image.open(frame_path).convert("RGB")
        frame = transform(frame).unsqueeze(0).to(device)
        
        data_dict = {
            "image": frame,
            "label": torch.tensor([0]).to(device),  # Dummy label
            "label_spe": torch.tensor([0]).to(device),  # Dummy specific label
        }
        
        with torch.no_grad():
            pred_dict = model(data_dict, inference=True)
            
            logits = pred_dict["cls"]  # Shape: [batch_size, num_classes]
            prob = torch.softmax(logits, dim=1)[:, 1].item()  # Probability of being "fake"
            probs.append(prob)
    
    avg_prob = np.mean(probs)
    prediction = "Fake" if avg_prob > 0.5 else "Real"
    return prediction, avg_prob

# main function for terminal-based inference
def main(video_filename, model_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    config_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/config/detector/{model_name}.yaml"
    weights_path = f"/teamspace/studios/this_studio/DeepfakeBench/training/weights/{model_name}_best.pth"
    
    if not os.path.exists(config_path):
        print(f"Error: Config file for model '{model_name}' not found at {config_path}.")
        return
    if not os.path.exists(weights_path):
        print(f"Error: Weights file for model '{model_name}' not found at {weights_path}.")
        return
    
    model = load_model(model_name, config_path, weights_path)
    
    video_path = os.path.join(os.getcwd(), video_filename)
    if not os.path.exists(video_path):
        print(f"Error: Video file '{video_filename}' not found in the current directory.")
        return
    
    prediction, confidence = infer_video(video_path, model, device)
    print(f"Model: {model_name}")
    print(f"Prediction: {prediction} (Confidence: {confidence:.4f})")


if __name__ == "__main__":
    import sys
    if len(sys.argv) != 3:
        print("Usage: python inference_script.py <video_filename> <model_name>")
        print("Available models: xception, meso4, meso4Inception, efficientnetb4, ucf, etc.")
    else:
        video_filename = sys.argv[1]
        model_name = sys.argv[2]
        main(video_filename, model_name)