|
|
|
import numpy as np |
|
import argparse |
|
import os |
|
import re |
|
import requests |
|
from pathlib import Path |
|
Path('out/').mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def alpha_num(f): |
|
f = re.sub(' +', ' ', f) |
|
f = re.sub(r'[^A-Za-z0-9 ]+', '', f) |
|
return f |
|
|
|
|
|
def command_line_args(): |
|
parser = argparse.ArgumentParser( |
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter |
|
) |
|
parser.add_argument( |
|
'--affective', |
|
help="Select Emotional or non-emotional variant of Available voices: https://audeering.github.io/shift/", |
|
action='store_false', |
|
) |
|
parser.add_argument( |
|
'--device', |
|
help="Device ID", |
|
type=str, |
|
default='cpu', |
|
) |
|
parser.add_argument( |
|
'--text', |
|
help="Text to be synthesized.", |
|
default='sample.txt', |
|
type=str, |
|
) |
|
parser.add_argument( |
|
'--soundscape', |
|
help='soundscape - MUST BE IN BRACKETS: \"forest\"', |
|
default=None, |
|
nargs='?', |
|
type=str, |
|
const='wind fjord', |
|
) |
|
parser.add_argument( |
|
'--native', |
|
help=""" |
|
--native: (without argument) a flag to do voice cloning using the speech from --video, |
|
--native my_voice.wav: Voice cloning from user provided audio""", |
|
|
|
|
|
|
|
) |
|
parser.add_argument( |
|
'--voice', |
|
help="TTS voice - Available voices: https://audeering.github.io/shift/", |
|
default="en_US/m-ailabs_low#judy_bieber", |
|
type=str, |
|
) |
|
parser.add_argument( |
|
'--image', |
|
help="If provided is set as background for output video, see --text", |
|
type=str, |
|
) |
|
parser.add_argument( |
|
'--video', |
|
help="Video file for video translation. Voice cloned from the video", |
|
type=str, |
|
) |
|
parser.add_argument( |
|
'--out_file', |
|
help="Output file name.", |
|
type=str, |
|
default=None |
|
) |
|
parser.add_argument( |
|
'--speed', |
|
help='speec of TTS (only used in Non English voices).', |
|
type=str, |
|
default=1.24, |
|
) |
|
return parser |
|
|
|
def send_to_server(args): |
|
url = "http://192.168.88.209:5000" |
|
|
|
|
|
|
|
payload = { |
|
'affective': args.affective, |
|
'voice': args.voice, |
|
'soundscape': args.soundscape, |
|
'native': args.native, |
|
'text': args.text, |
|
'image': args.image, |
|
'video': args.video, |
|
'speed': args.speed, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
text_file = open(args.text, 'rb') |
|
|
|
image_file, video_file, native_file = None, None, None |
|
if args.image is not None: |
|
print('\nLOADING IMAGE\n') |
|
try: |
|
image_file = open(args.image, 'rb') |
|
except FileNotFoundError: |
|
pass |
|
|
|
if args.video is not None: |
|
print('\nLOADING vid\n') |
|
try: |
|
video_file = open(args.video, 'rb') |
|
except FileNotFoundError: |
|
pass |
|
|
|
if args.native is not None: |
|
print('\nLOADING natv\n') |
|
try: |
|
native_file = open(args.native, 'rb') |
|
except FileNotFoundError: |
|
pass |
|
|
|
|
|
|
|
response = requests.post(url, data=payload, |
|
files=[(args.text, text_file), |
|
(args.image, image_file), |
|
(args.video, video_file), |
|
(args.native, native_file)]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return response |
|
|
|
|
|
def cli(): |
|
parser = command_line_args() |
|
args = parser.parse_args() |
|
|
|
if args.out_file is None: |
|
vid = alpha_num(args.video) if args.video else f'{np.random.rand()*1e7}'[:6] |
|
args.out_file = alpha_num(args.text) + '_' + alpha_num(args.voice) + '_' + vid |
|
response = send_to_server(args) |
|
|
|
with open( |
|
|
|
'./out/' + args.out_file + '.' + response.headers['suffix-file-type'].split('.')[-1], |
|
'wb' |
|
) as f: |
|
f.write(response.content) |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
cli() |
|
|
|
|
|
|
|
|