Spaces:

kiokpam
/

Movinet

Sleeping

App Files Files Community

Movinet / app.py

kiokpam

fix

09f9169 25 days ago

raw

history blame contribute delete

3.38 kB

	import tensorflow as tf
	import cv2
	import gradio as gr
	import random
	import numpy as np
	import pandas as pd
	tflite_filename = 'model-400.tflite'
	interpreter = tf.lite.Interpreter(model_path=tflite_filename)
	runner = interpreter.get_signature_runner()
	init_states = {
	name: tf.zeros(x['shape'], dtype=x['dtype'])
	for name, x in runner.get_input_details().items()
	}
	del init_states['image']

	data = pd.read_csv('labels.csv', header=None)
	CLASSES = data[1].values
	def format_frames(frame, output_size):
	"""
	Pad and resize an image from a video.

	Args:
	frame: Image that needs to resized and padded.
	output_size: Pixel size of the output frame image.

	Return:
	Formatted frame with padding of specified output size.
	"""
	frame = tf.image.convert_image_dtype(frame, tf.float32)
	frame = tf.image.resize_with_pad(frame, *output_size)
	return frame
	def frames_from_video_file(video_path, n_frames, output_size = (224,224)):
	"""
	Creates frames from each video file present for each category.

	Args:
	video_path: File path to the video.
	n_frames: Number of frames to be created per video file.
	output_size: Pixel size of the output frame image.

	Return:
	An NumPy array of frames in the shape of (n_frames, height, width, channels).
	"""
	# Read each video frame by frame
	result = []
	src = cv2.VideoCapture(str(video_path))

	video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

	frame_step = max(1, int(video_length / n_frames))

	need_length = 1 + (n_frames - 1) * frame_step

	if need_length > video_length:
	start = 0
	else:
	max_start = video_length - need_length
	start = random.randint(0, max_start + 1)

	src.set(cv2.CAP_PROP_POS_FRAMES, start)
	# ret is a boolean indicating whether read was successful, frame is the image itself
	ret, frame = src.read()
	result.append(format_frames(frame, output_size))

	for _ in range(n_frames - 1):
	for _ in range(frame_step):
	ret, frame = src.read()
	if ret:
	frame = format_frames(frame, output_size)
	result.append(frame)
	else:
	result.append(np.zeros_like(result[0]))
	src.release()
	result = np.array(result)[..., [2, 1, 0]].reshape((1, n_frames, *output_size, 3))

	return result

	def get_top_k(probs, k=5, label_map=CLASSES):
	"""Outputs the top k model labels and probabilities on the given video."""
	top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
	top_labels = tf.gather(label_map, top_predictions, axis=-1)
	top_labels = [label.decode('utf8') for label in top_labels.numpy()]
	top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
	return tuple(zip(top_labels, top_probs))

	def inferenece(video):
	video = frames_from_video_file(video, 13)
	clips = tf.split(video, video.shape[1], axis=1)
	states = init_states
	for clip in clips:
	outputs = runner(**states, image=clip)
	logits = outputs.pop('logits')[0]
	states = outputs
	probs = tf.nn.softmax(logits)
	top_k = get_top_k(probs)
	result_str = '\n'.join([f'{label}: {prob:.4f}' for label, prob in top_k])
	return result_str

	demo = gr.Interface(fn=inferenece, inputs='video', outputs='text')
	demo.launch()