Eempostor commited on
Commit
8033a00
·
verified ·
1 Parent(s): adaa554

Delete lib/infer_libs/audio.py

Browse files
Files changed (1) hide show
  1. lib/infer_libs/audio.py +0 -142
lib/infer_libs/audio.py DELETED
@@ -1,142 +0,0 @@
1
- import librosa
2
- import numpy as np
3
- import av
4
- from io import BytesIO
5
- import ffmpeg
6
- import os
7
- import traceback
8
- import sys
9
- import random
10
- import subprocess
11
-
12
- platform_stft_mapping = {
13
- 'linux': os.path.join(os.getcwd(), 'stftpitchshift'),
14
- 'darwin': os.path.join(os.getcwd(), 'stftpitchshift'),
15
- 'win32': os.path.join(os.getcwd(), 'stftpitchshift.exe'),
16
- }
17
-
18
- stft = platform_stft_mapping.get(sys.platform)
19
-
20
- def wav2(i, o, format):
21
- inp = av.open(i, 'rb')
22
- if format == "m4a": format = "mp4"
23
- out = av.open(o, 'wb', format=format)
24
- if format == "ogg": format = "libvorbis"
25
- if format == "mp4": format = "aac"
26
-
27
- ostream = out.add_stream(format)
28
-
29
- for frame in inp.decode(audio=0):
30
- for p in ostream.encode(frame): out.mux(p)
31
-
32
- for p in ostream.encode(None): out.mux(p)
33
-
34
- out.close()
35
- inp.close()
36
-
37
- def audio2(i, o, format, sr):
38
- inp = av.open(i, 'rb')
39
- out = av.open(o, 'wb', format=format)
40
- if format == "ogg": format = "libvorbis"
41
- if format == "f32le": format = "pcm_f32le"
42
-
43
- ostream = out.add_stream(format, channels=1)
44
- ostream.sample_rate = sr
45
-
46
- for frame in inp.decode(audio=0):
47
- for p in ostream.encode(frame): out.mux(p)
48
-
49
- out.close()
50
- inp.close()
51
-
52
- def load_audion(file, sr):
53
- try:
54
- file = (
55
- file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
56
- ) # 防止小白拷路径头尾带了空格和"和回车
57
- with open(file, "rb") as f:
58
- with BytesIO() as out:
59
- audio2(f, out, "f32le", sr)
60
- return np.frombuffer(out.getvalue(), np.float32).flatten()
61
-
62
- except AttributeError:
63
- audio = file[1] / 32768.0
64
- if len(audio.shape) == 2:
65
- audio = np.mean(audio, -1)
66
- return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
67
-
68
- except Exception as e:
69
- raise RuntimeError(f"Failed to load audio: {e}")
70
-
71
- def load_audio(file, sr, DoFormant=False, Quefrency=1.0, Timbre=1.0):
72
- converted = False
73
- formanted = False
74
- file = file.strip(' \n"')
75
- if not os.path.exists(file):
76
- raise RuntimeError(
77
- "Wrong audio path, that does not exist."
78
- )
79
-
80
- try:
81
- if not file.endswith(".wav"):
82
- converted = True
83
- formatted_file = f"{os.path.splitext(os.path.basename(file))[0]}.wav"
84
- subprocess.run(
85
- ["ffmpeg", "-nostdin", "-i", file, formatted_file],
86
- capture_output=True,
87
- text=True,
88
- )
89
- file = formatted_file
90
- print(f"File formatted to wav format: {file}\n")
91
-
92
- if DoFormant:
93
- print("Starting formant shift. Please wait as this process takes a while.")
94
- formanted_file = f"{os.path.splitext(os.path.basename(file))[0]}_formanted{os.path.splitext(os.path.basename(file))[1]}"
95
- command = (
96
- f'{stft} -i "{file}" -q "{Quefrency}" '
97
- f'-t "{Timbre}" -o "{formanted_file}"'
98
- )
99
- subprocess.run(command, shell=True)
100
- file = formanted_file
101
- print(f"Formanted {file}\n")
102
-
103
- with open(file, "rb") as f:
104
- with BytesIO() as out:
105
- audio2(f, out, "f32le", sr)
106
- audio_data = np.frombuffer(out.getvalue(), np.float32).flatten()
107
-
108
- if converted:
109
- try:
110
- os.remove(formatted_file)
111
- except Exception as error:
112
- print(f"Couldn't remove converted type of file due to {error}")
113
- error = None
114
- converted = False
115
-
116
- return audio_data
117
-
118
- except AttributeError:
119
- audio = file[1] / 32768.0
120
- if len(audio.shape) == 2:
121
- audio = np.mean(audio, -1)
122
- return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
123
- except Exception:
124
- raise RuntimeError(traceback.format_exc())
125
-
126
- def check_audio_duration(file):
127
- try:
128
- file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
129
-
130
- probe = ffmpeg.probe(file)
131
-
132
- duration = float(probe['streams'][0]['duration'])
133
-
134
- if duration < 0.76:
135
- print(
136
- f"Audio file, {file.split('/')[-1]}, under ~0.76s detected - file is too short. Target at least 1-2s for best results."
137
- )
138
- return False
139
-
140
- return True
141
- except Exception as e:
142
- raise RuntimeError(f"Failed to check audio duration: {e}")