Eempostor commited on
Commit
7e96eb9
·
verified ·
1 Parent(s): 8033a00

Upload audio.py

Browse files
Files changed (1) hide show
  1. lib/infer_libs/audio.py +87 -0
lib/infer_libs/audio.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import av
3
+ import ffmpeg
4
+ import os
5
+ import traceback
6
+ import sys
7
+ import subprocess
8
+
9
+ platform_stft_mapping = {
10
+ 'linux': os.path.join(os.getcwd(), 'stftpitchshift'),
11
+ 'darwin': os.path.join(os.getcwd(), 'stftpitchshift'),
12
+ 'win32': os.path.join(os.getcwd(), 'stftpitchshift.exe'),
13
+ }
14
+
15
+ stft = platform_stft_mapping.get(sys.platform)
16
+
17
+ def wav2(i, o, format):
18
+ inp = av.open(i, 'rb')
19
+ if format == "m4a": format = "mp4"
20
+ out = av.open(o, 'wb', format=format)
21
+ if format == "ogg": format = "libvorbis"
22
+ if format == "mp4": format = "aac"
23
+
24
+ ostream = out.add_stream(format)
25
+
26
+ for frame in inp.decode(audio=0):
27
+ for p in ostream.encode(frame): out.mux(p)
28
+
29
+ for p in ostream.encode(None): out.mux(p)
30
+
31
+ out.close()
32
+ inp.close()
33
+
34
+ def load_audio(file, sr, DoFormant=False, Quefrency=1.0, Timbre=1.0):
35
+ formanted = False
36
+ file = file.strip(' \n"')
37
+ if not os.path.exists(file):
38
+ raise RuntimeError(
39
+ "Wrong audio path, that does not exist."
40
+ )
41
+
42
+ try:
43
+ if DoFormant:
44
+ print("Starting formant shift. Please wait as this process takes a while.")
45
+ formanted_file = f"{os.path.splitext(os.path.basename(file))[0]}_formanted{os.path.splitext(os.path.basename(file))[1]}"
46
+ command = (
47
+ f'{stft} -i "{file}" -q "{Quefrency}" '
48
+ f'-t "{Timbre}" -o "{formanted_file}"'
49
+ )
50
+ subprocess.run(command, shell=True)
51
+ file = formanted_file
52
+ print(f"Formanted {file}\n")
53
+
54
+ # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
55
+ # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
56
+ # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
57
+ file = (
58
+ file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
59
+ ) # Prevent small white copy path head and tail with spaces and " and return
60
+ out, _ = (
61
+ ffmpeg.input(file, threads=0)
62
+ .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
63
+ .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
64
+ )
65
+
66
+ return np.frombuffer(out, np.float32).flatten()
67
+
68
+ except Exception as e:
69
+ raise RuntimeError(f"Failed to load audio: {e}")
70
+
71
+ def check_audio_duration(file):
72
+ try:
73
+ file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
74
+
75
+ probe = ffmpeg.probe(file)
76
+
77
+ duration = float(probe['streams'][0]['duration'])
78
+
79
+ if duration < 0.76:
80
+ print(
81
+ f"Audio file, {file.split('/')[-1]}, under ~0.76s detected - file is too short. Target at least 1-2s for best results."
82
+ )
83
+ return False
84
+
85
+ return True
86
+ except Exception as e:
87
+ raise RuntimeError(f"Failed to check audio duration: {e}")