Spaces:
Runtime error
Runtime error
File size: 2,667 Bytes
bab971b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import numpy as np
import torch
from tqdm import tqdm
import math
from einops import rearrange
import sys
sys.path.append(".")
from opensora.eval.flolpips.pwcnet import Network as PWCNet
from opensora.eval.flolpips.flolpips import FloLPIPS
loss_fn = FloLPIPS(net='alex', version='0.1').eval().requires_grad_(False)
flownet = PWCNet().eval().requires_grad_(False)
def trans(x):
return x
def calculate_flolpips(videos1, videos2, device):
global loss_fn, flownet
print("calculate_flowlpips...")
loss_fn = loss_fn.to(device)
flownet = flownet.to(device)
if videos1.shape != videos2.shape:
print("Warning: the shape of videos are not equal.")
min_frames = min(videos1.shape[1], videos2.shape[1])
videos1 = videos1[:, :min_frames]
videos2 = videos2[:, :min_frames]
videos1 = trans(videos1)
videos2 = trans(videos2)
flolpips_results = []
for video_num in tqdm(range(videos1.shape[0])):
video1 = videos1[video_num].to(device)
video2 = videos2[video_num].to(device)
frames_rec = video1[:-1]
frames_rec_next = video1[1:]
frames_gt = video2[:-1]
frames_gt_next = video2[1:]
t, c, h, w = frames_gt.shape
flow_gt = flownet(frames_gt, frames_gt_next)
flow_dis = flownet(frames_rec, frames_rec_next)
flow_diff = flow_gt - flow_dis
flolpips = loss_fn.forward(frames_gt, frames_rec, flow_diff, normalize=True)
flolpips_results.append(flolpips.cpu().numpy().tolist())
flolpips_results = np.array(flolpips_results) # [batch_size, num_frames]
flolpips = {}
flolpips_std = {}
for clip_timestamp in range(flolpips_results.shape[1]):
flolpips[clip_timestamp] = np.mean(flolpips_results[:,clip_timestamp], axis=-1)
flolpips_std[clip_timestamp] = np.std(flolpips_results[:,clip_timestamp], axis=-1)
result = {
"value": flolpips,
"value_std": flolpips_std,
"video_setting": video1.shape,
"video_setting_name": "time, channel, heigth, width",
"result": flolpips_results,
"details": flolpips_results.tolist()
}
return result
# test code / using example
def main():
NUMBER_OF_VIDEOS = 8
VIDEO_LENGTH = 50
CHANNEL = 3
SIZE = 64
videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
videos2 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
import json
result = calculate_flolpips(videos1, videos2, "cuda:0")
print(json.dumps(result, indent=4))
if __name__ == "__main__":
main() |